diff mbox series

[v5,8/8] hw/mem/cxl_type3: Add CXL RAS Error Injection Support.

Message ID 20230221152145.9736-9-Jonathan.Cameron@huawei.com
State Superseded
Headers show
Series hw/cxl: RAS error emulation and injection | expand

Commit Message

Jonathan Cameron Feb. 21, 2023, 3:21 p.m. UTC
CXL uses PCI AER Internal errors to signal to the host that an error has
occurred. The host can then read more detailed status from the CXL RAS
capability.

For uncorrectable errors: support multiple injection in one operation
as this is needed to reliably test multiple header logging support in an
OS. The equivalent feature doesn't exist for correctable errors, so only
one error need be injected at a time.

Note:
 - Header content needs to be manually specified in a fashion that
   matches the specification for what can be in the header for each
   error type.

Injection via QMP:
{ "execute": "qmp_capabilities" }
...
{ "execute": "cxl-inject-uncorrectable-errors",
  "arguments": {
    "path": "/machine/peripheral/cxl-pmem0",
    "errors": [
        {
            "type": "cache-address-parity",
            "header": [ 3, 4]
        },
        {
            "type": "cache-data-parity",
            "header": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
        },
        {
            "type": "internal",
            "header": [ 1, 2, 4]
        }
        ]
  }}
...
{ "execute": "cxl-inject-correctable-error",
    "arguments": {
        "path": "/machine/peripheral/cxl-pmem0",
        "type": "physical"
    } }

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
v5: (Thanks to Dave Jiang for review)
- Spell out Implementation Defined (previously typo as Imdef which did
  not help)
v4:
- Improved QMP help text wth more detail (following request in review
  of the Poison injection series)
---
 hw/cxl/cxl-component-utils.c   |   4 +-
 hw/mem/cxl_type3.c             | 281 +++++++++++++++++++++++++++++++++
 hw/mem/cxl_type3_stubs.c       |  10 ++
 hw/mem/meson.build             |   2 +
 include/hw/cxl/cxl_component.h |  26 +++
 include/hw/cxl/cxl_device.h    |  11 ++
 qapi/cxl.json                  | 118 ++++++++++++++
 qapi/meson.build               |   1 +
 qapi/qapi-schema.json          |   1 +
 9 files changed, 453 insertions(+), 1 deletion(-)

Comments

Dave Jiang Feb. 21, 2023, 3:48 p.m. UTC | #1
On 2/21/23 8:21 AM, Jonathan Cameron wrote:
> CXL uses PCI AER Internal errors to signal to the host that an error has
> occurred. The host can then read more detailed status from the CXL RAS
> capability.
> 
> For uncorrectable errors: support multiple injection in one operation
> as this is needed to reliably test multiple header logging support in an
> OS. The equivalent feature doesn't exist for correctable errors, so only
> one error need be injected at a time.
> 
> Note:
>   - Header content needs to be manually specified in a fashion that
>     matches the specification for what can be in the header for each
>     error type.
> 
> Injection via QMP:
> { "execute": "qmp_capabilities" }
> ...
> { "execute": "cxl-inject-uncorrectable-errors",
>    "arguments": {
>      "path": "/machine/peripheral/cxl-pmem0",
>      "errors": [
>          {
>              "type": "cache-address-parity",
>              "header": [ 3, 4]
>          },
>          {
>              "type": "cache-data-parity",
>              "header": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
>          },
>          {
>              "type": "internal",
>              "header": [ 1, 2, 4]
>          }
>          ]
>    }}
> ...
> { "execute": "cxl-inject-correctable-error",
>      "arguments": {
>          "path": "/machine/peripheral/cxl-pmem0",
>          "type": "physical"
>      } }
> 
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

Reviewed-by: Dave Jiang <dave.jiang@intel.com>

> ---
> v5: (Thanks to Dave Jiang for review)
> - Spell out Implementation Defined (previously typo as Imdef which did
>    not help)
> v4:
> - Improved QMP help text wth more detail (following request in review
>    of the Poison injection series)
> ---
>   hw/cxl/cxl-component-utils.c   |   4 +-
>   hw/mem/cxl_type3.c             | 281 +++++++++++++++++++++++++++++++++
>   hw/mem/cxl_type3_stubs.c       |  10 ++
>   hw/mem/meson.build             |   2 +
>   include/hw/cxl/cxl_component.h |  26 +++
>   include/hw/cxl/cxl_device.h    |  11 ++
>   qapi/cxl.json                  | 118 ++++++++++++++
>   qapi/meson.build               |   1 +
>   qapi/qapi-schema.json          |   1 +
>   9 files changed, 453 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
> index 737b4764b9..b665d4f565 100644
> --- a/hw/cxl/cxl-component-utils.c
> +++ b/hw/cxl/cxl-component-utils.c
> @@ -142,16 +142,18 @@ static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk)
>        * be handled as RO.
>        */
>       stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, 0);
> +    stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_STATUS, 0x1cfff);
>       /* Bits 12-13 and 17-31 reserved in CXL 2.0 */
>       stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK, 0x1cfff);
>       stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_MASK, 0x1cfff);
>       stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_SEVERITY, 0x1cfff);
>       stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_SEVERITY, 0x1cfff);
>       stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, 0);
> +    stl_le_p(write_msk + R_CXL_RAS_COR_ERR_STATUS, 0x7f);
>       stl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK, 0x7f);
>       stl_le_p(write_msk + R_CXL_RAS_COR_ERR_MASK, 0x7f);
>       /* CXL switches and devices must set */
> -    stl_le_p(reg_state + R_CXL_RAS_ERR_CAP_CTRL, 0x00);
> +    stl_le_p(reg_state + R_CXL_RAS_ERR_CAP_CTRL, 0x200);
>   }
>   
>   static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 6cdd988d1d..abe60b362c 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -1,6 +1,7 @@
>   #include "qemu/osdep.h"
>   #include "qemu/units.h"
>   #include "qemu/error-report.h"
> +#include "qapi/qapi-commands-cxl.h"
>   #include "hw/mem/memory-device.h"
>   #include "hw/mem/pc-dimm.h"
>   #include "hw/pci/pci.h"
> @@ -323,6 +324,66 @@ static void hdm_decoder_commit(CXLType3Dev *ct3d, int which)
>       ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
>   }
>   
> +static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err)
> +{
> +    switch (qmp_err) {
> +    case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_PARITY:
> +        return CXL_RAS_UNC_ERR_CACHE_DATA_PARITY;
> +    case CXL_UNCOR_ERROR_TYPE_CACHE_ADDRESS_PARITY:
> +        return CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY;
> +    case CXL_UNCOR_ERROR_TYPE_CACHE_BE_PARITY:
> +        return CXL_RAS_UNC_ERR_CACHE_BE_PARITY;
> +    case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_ECC:
> +        return CXL_RAS_UNC_ERR_CACHE_DATA_ECC;
> +    case CXL_UNCOR_ERROR_TYPE_MEM_DATA_PARITY:
> +        return CXL_RAS_UNC_ERR_MEM_DATA_PARITY;
> +    case CXL_UNCOR_ERROR_TYPE_MEM_ADDRESS_PARITY:
> +        return CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY;
> +    case CXL_UNCOR_ERROR_TYPE_MEM_BE_PARITY:
> +        return CXL_RAS_UNC_ERR_MEM_BE_PARITY;
> +    case CXL_UNCOR_ERROR_TYPE_MEM_DATA_ECC:
> +        return CXL_RAS_UNC_ERR_MEM_DATA_ECC;
> +    case CXL_UNCOR_ERROR_TYPE_REINIT_THRESHOLD:
> +        return CXL_RAS_UNC_ERR_REINIT_THRESHOLD;
> +    case CXL_UNCOR_ERROR_TYPE_RSVD_ENCODING:
> +        return CXL_RAS_UNC_ERR_RSVD_ENCODING;
> +    case CXL_UNCOR_ERROR_TYPE_POISON_RECEIVED:
> +        return CXL_RAS_UNC_ERR_POISON_RECEIVED;
> +    case CXL_UNCOR_ERROR_TYPE_RECEIVER_OVERFLOW:
> +        return CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW;
> +    case CXL_UNCOR_ERROR_TYPE_INTERNAL:
> +        return CXL_RAS_UNC_ERR_INTERNAL;
> +    case CXL_UNCOR_ERROR_TYPE_CXL_IDE_TX:
> +        return CXL_RAS_UNC_ERR_CXL_IDE_TX;
> +    case CXL_UNCOR_ERROR_TYPE_CXL_IDE_RX:
> +        return CXL_RAS_UNC_ERR_CXL_IDE_RX;
> +    default:
> +        return -EINVAL;
> +    }
> +}
> +
> +static int ct3d_qmp_cor_err_to_cxl(CxlCorErrorType qmp_err)
> +{
> +    switch (qmp_err) {
> +    case CXL_COR_ERROR_TYPE_CACHE_DATA_ECC:
> +        return CXL_RAS_COR_ERR_CACHE_DATA_ECC;
> +    case CXL_COR_ERROR_TYPE_MEM_DATA_ECC:
> +        return CXL_RAS_COR_ERR_MEM_DATA_ECC;
> +    case CXL_COR_ERROR_TYPE_CRC_THRESHOLD:
> +        return CXL_RAS_COR_ERR_CRC_THRESHOLD;
> +    case CXL_COR_ERROR_TYPE_RETRY_THRESHOLD:
> +        return CXL_RAS_COR_ERR_RETRY_THRESHOLD;
> +    case CXL_COR_ERROR_TYPE_CACHE_POISON_RECEIVED:
> +        return CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED;
> +    case CXL_COR_ERROR_TYPE_MEM_POISON_RECEIVED:
> +        return CXL_RAS_COR_ERR_MEM_POISON_RECEIVED;
> +    case CXL_COR_ERROR_TYPE_PHYSICAL:
> +        return CXL_RAS_COR_ERR_PHYSICAL;
> +    default:
> +        return -EINVAL;
> +    }
> +}
> +
>   static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
>                              unsigned size)
>   {
> @@ -341,6 +402,83 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
>           should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
>           which_hdm = 0;
>           break;
> +    case A_CXL_RAS_UNC_ERR_STATUS:
> +    {
> +        uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
> +        uint32_t fe = FIELD_EX32(capctrl, CXL_RAS_ERR_CAP_CTRL, FIRST_ERROR_POINTER);
> +        CXLError *cxl_err;
> +        uint32_t unc_err;
> +
> +        /*
> +         * If single bit written that corresponds to the first error
> +         * pointer being cleared, update the status and header log.
> +         */
> +        if (!QTAILQ_EMPTY(&ct3d->error_list)) {
> +            if ((1 << fe) ^ value) {
> +                CXLError *cxl_next;
> +                /*
> +                 * Software is using wrong flow for multiple header recording
> +                 * Following behavior in PCIe r6.0 and assuming multiple
> +                 * header support. Implementation defined choice to clear all
> +                 * matching records if more than one bit set - which corresponds
> +                 * closest to behavior of hardware not capable of multiple
> +                 * header recording.
> +                 */
> +                QTAILQ_FOREACH_SAFE(cxl_err, &ct3d->error_list, node, cxl_next) {
> +                    if ((1 << cxl_err->type) & value) {
> +                        QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
> +                        g_free(cxl_err);
> +                    }
> +                }
> +            } else {
> +                /* Done with previous FE, so drop from list */
> +                cxl_err = QTAILQ_FIRST(&ct3d->error_list);
> +                QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
> +                g_free(cxl_err);
> +            }
> +
> +            /*
> +             * If there is another FE, then put that in place and update
> +             * the header log
> +             */
> +            if (!QTAILQ_EMPTY(&ct3d->error_list)) {
> +                uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
> +                int i;
> +
> +                cxl_err = QTAILQ_FIRST(&ct3d->error_list);
> +                for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
> +                    stl_le_p(header_log + i, cxl_err->header[i]);
> +                }
> +                capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
> +                                     FIRST_ERROR_POINTER, cxl_err->type);
> +            } else {
> +                /*
> +                 * If no more errors, then follow recomendation of PCI spec
> +                 * r6.0 6.2.4.2 to set the first error pointer to a status
> +                 * bit that will never be used.
> +                 */
> +                capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
> +                                     FIRST_ERROR_POINTER,
> +                                     CXL_RAS_UNC_ERR_CXL_UNUSED);
> +            }
> +            stl_le_p((uint8_t *)cache_mem + A_CXL_RAS_ERR_CAP_CTRL, capctrl);
> +        }
> +        unc_err = 0;
> +        QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
> +            unc_err |= 1 << cxl_err->type;
> +        }
> +        stl_le_p((uint8_t *)cache_mem + offset, unc_err);
> +
> +        return;
> +    }
> +    case A_CXL_RAS_COR_ERR_STATUS:
> +    {
> +        uint32_t rw1c = value;
> +        uint32_t temp = ldl_le_p((uint8_t *)cache_mem + offset);
> +        temp &= ~rw1c;
> +        stl_le_p((uint8_t *)cache_mem + offset, temp);
> +        return;
> +    }
>       default:
>           break;
>       }
> @@ -404,6 +542,8 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>       unsigned short msix_num = 1;
>       int i, rc;
>   
> +    QTAILQ_INIT(&ct3d->error_list);
> +
>       if (!cxl_setup_memory(ct3d, errp)) {
>           return;
>       }
> @@ -631,6 +771,147 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
>        */
>   }
>   
> +/* For uncorrectable errors include support for multiple header recording */
> +void qmp_cxl_inject_uncorrectable_errors(const char *path,
> +                                         CXLUncorErrorRecordList *errors,
> +                                         Error **errp)
> +{
> +    Object *obj = object_resolve_path(path, NULL);
> +    static PCIEAERErr err = {};
> +    CXLType3Dev *ct3d;
> +    CXLError *cxl_err;
> +    uint32_t *reg_state;
> +    uint32_t unc_err;
> +    bool first;
> +
> +    if (!obj) {
> +        error_setg(errp, "Unable to resolve path");
> +        return;
> +    }
> +
> +    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
> +        error_setg(errp, "Path does not point to a CXL type 3 device");
> +        return;
> +    }
> +
> +    err.status = PCI_ERR_UNC_INTN;
> +    err.source_id = pci_requester_id(PCI_DEVICE(obj));
> +    err.flags = 0;
> +
> +    ct3d = CXL_TYPE3(obj);
> +
> +    first = QTAILQ_EMPTY(&ct3d->error_list);
> +    reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
> +    while (errors) {
> +        uint32List *header = errors->value->header;
> +        uint8_t header_count = 0;
> +        int cxl_err_code;
> +
> +        cxl_err_code = ct3d_qmp_uncor_err_to_cxl(errors->value->type);
> +        if (cxl_err_code < 0) {
> +            error_setg(errp, "Unknown error code");
> +            return;
> +        }
> +
> +        /* If the error is masked, nothing to do here */
> +        if (!((1 << cxl_err_code) &
> +              ~ldl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK))) {
> +            errors = errors->next;
> +            continue;
> +        }
> +
> +        cxl_err = g_malloc0(sizeof(*cxl_err));
> +        if (!cxl_err) {
> +            return;
> +        }
> +
> +        cxl_err->type = cxl_err_code;
> +        while (header && header_count < 32) {
> +            cxl_err->header[header_count++] = header->value;
> +            header = header->next;
> +        }
> +        if (header_count > 32) {
> +            error_setg(errp, "Header must be 32 DWORD or less");
> +            return;
> +        }
> +        QTAILQ_INSERT_TAIL(&ct3d->error_list, cxl_err, node);
> +
> +        errors = errors->next;
> +    }
> +
> +    if (first && !QTAILQ_EMPTY(&ct3d->error_list)) {
> +        uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> +        uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
> +        uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
> +        int i;
> +
> +        cxl_err = QTAILQ_FIRST(&ct3d->error_list);
> +        for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
> +            stl_le_p(header_log + i, cxl_err->header[i]);
> +        }
> +
> +        capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
> +                             FIRST_ERROR_POINTER, cxl_err->type);
> +        stl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL, capctrl);
> +    }
> +
> +    unc_err = 0;
> +    QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
> +        unc_err |= (1 << cxl_err->type);
> +    }
> +    if (!unc_err) {
> +        return;
> +    }
> +
> +    stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, unc_err);
> +    pcie_aer_inject_error(PCI_DEVICE(obj), &err);
> +
> +    return;
> +}
> +
> +void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
> +                                      Error **errp)
> +{
> +    static PCIEAERErr err = {};
> +    Object *obj = object_resolve_path(path, NULL);
> +    CXLType3Dev *ct3d;
> +    uint32_t *reg_state;
> +    uint32_t cor_err;
> +    int cxl_err_type;
> +
> +    if (!obj) {
> +        error_setg(errp, "Unable to resolve path");
> +        return;
> +    }
> +    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
> +        error_setg(errp, "Path does not point to a CXL type 3 device");
> +        return;
> +    }
> +
> +    err.status = PCI_ERR_COR_INTERNAL;
> +    err.source_id = pci_requester_id(PCI_DEVICE(obj));
> +    err.flags = PCIE_AER_ERR_IS_CORRECTABLE;
> +
> +    ct3d = CXL_TYPE3(obj);
> +    reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
> +    cor_err = ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS);
> +
> +    cxl_err_type = ct3d_qmp_cor_err_to_cxl(type);
> +    if (cxl_err_type < 0) {
> +        error_setg(errp, "Invalid COR error");
> +        return;
> +    }
> +    /* If the error is masked, nothting to do here */
> +    if (!((1 << cxl_err_type) & ~ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK))) {
> +        return;
> +    }
> +
> +    cor_err |= (1 << cxl_err_type);
> +    stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, cor_err);
> +
> +    pcie_aer_inject_error(PCI_DEVICE(obj), &err);
> +}
> +
>   static void ct3_class_init(ObjectClass *oc, void *data)
>   {
>       DeviceClass *dc = DEVICE_CLASS(oc);
> diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
> new file mode 100644
> index 0000000000..b6b51ced54
> --- /dev/null
> +++ b/hw/mem/cxl_type3_stubs.c
> @@ -0,0 +1,10 @@
> +
> +#include "qemu/osdep.h"
> +#include "qapi/qapi-commands-cxl.h"
> +
> +void qmp_cxl_inject_uncorrectable_errors(const char *path,
> +                                         CXLUncorErrorRecordList *errors,
> +                                         Error **errp) {}
> +
> +void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
> +                                      Error **errp) {}
> diff --git a/hw/mem/meson.build b/hw/mem/meson.build
> index 609b2b36fc..56c2618b84 100644
> --- a/hw/mem/meson.build
> +++ b/hw/mem/meson.build
> @@ -4,6 +4,8 @@ mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c'))
>   mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c'))
>   mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c'))
>   mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c'))
> +softmmu_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_false: files('cxl_type3_stubs.c'))
> +softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('cxl_type3_stubs.c'))
>   
>   softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss)
>   
> diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
> index 692d7a5507..ec4203b83f 100644
> --- a/include/hw/cxl/cxl_component.h
> +++ b/include/hw/cxl/cxl_component.h
> @@ -65,11 +65,37 @@ CXLx_CAPABILITY_HEADER(SNOOP, 0x14)
>   #define CXL_RAS_REGISTERS_OFFSET 0x80
>   #define CXL_RAS_REGISTERS_SIZE   0x58
>   REG32(CXL_RAS_UNC_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET)
> +#define CXL_RAS_UNC_ERR_CACHE_DATA_PARITY 0
> +#define CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY 1
> +#define CXL_RAS_UNC_ERR_CACHE_BE_PARITY 2
> +#define CXL_RAS_UNC_ERR_CACHE_DATA_ECC 3
> +#define CXL_RAS_UNC_ERR_MEM_DATA_PARITY 4
> +#define CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY 5
> +#define CXL_RAS_UNC_ERR_MEM_BE_PARITY 6
> +#define CXL_RAS_UNC_ERR_MEM_DATA_ECC 7
> +#define CXL_RAS_UNC_ERR_REINIT_THRESHOLD 8
> +#define CXL_RAS_UNC_ERR_RSVD_ENCODING 9
> +#define CXL_RAS_UNC_ERR_POISON_RECEIVED 10
> +#define CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW 11
> +#define CXL_RAS_UNC_ERR_INTERNAL 14
> +#define CXL_RAS_UNC_ERR_CXL_IDE_TX 15
> +#define CXL_RAS_UNC_ERR_CXL_IDE_RX 16
> +#define CXL_RAS_UNC_ERR_CXL_UNUSED 63 /* Magic value */
>   REG32(CXL_RAS_UNC_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x4)
>   REG32(CXL_RAS_UNC_ERR_SEVERITY, CXL_RAS_REGISTERS_OFFSET + 0x8)
>   REG32(CXL_RAS_COR_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET + 0xc)
> +#define CXL_RAS_COR_ERR_CACHE_DATA_ECC 0
> +#define CXL_RAS_COR_ERR_MEM_DATA_ECC 1
> +#define CXL_RAS_COR_ERR_CRC_THRESHOLD 2
> +#define CXL_RAS_COR_ERR_RETRY_THRESHOLD 3
> +#define CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED 4
> +#define CXL_RAS_COR_ERR_MEM_POISON_RECEIVED 5
> +#define CXL_RAS_COR_ERR_PHYSICAL 6
>   REG32(CXL_RAS_COR_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x10)
>   REG32(CXL_RAS_ERR_CAP_CTRL, CXL_RAS_REGISTERS_OFFSET + 0x14)
> +    FIELD(CXL_RAS_ERR_CAP_CTRL, FIRST_ERROR_POINTER, 0, 6)
> +REG32(CXL_RAS_ERR_HEADER0, CXL_RAS_REGISTERS_OFFSET + 0x18)
> +#define CXL_RAS_ERR_HEADER_NUM 32
>   /* Offset 0x18 - 0x58 reserved for RAS logs */
>   
>   /* 8.2.5.10 - CXL Security Capability Structure */
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 7e5ad65c1d..d589f78202 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -232,6 +232,14 @@ REG64(CXL_MEM_DEV_STS, 0)
>       FIELD(CXL_MEM_DEV_STS, MBOX_READY, 4, 1)
>       FIELD(CXL_MEM_DEV_STS, RESET_NEEDED, 5, 3)
>   
> +typedef struct CXLError {
> +    QTAILQ_ENTRY(CXLError) node;
> +    int type; /* Error code as per FE definition */
> +    uint32_t header[32];
> +} CXLError;
> +
> +typedef QTAILQ_HEAD(, CXLError) CXLErrorList;
> +
>   struct CXLType3Dev {
>       /* Private */
>       PCIDevice parent_obj;
> @@ -248,6 +256,9 @@ struct CXLType3Dev {
>   
>       /* DOE */
>       DOECap doe_cdat;
> +
> +    /* Error injection */
> +    CXLErrorList error_list;
>   };
>   
>   #define TYPE_CXL_TYPE3 "cxl-type3"
> diff --git a/qapi/cxl.json b/qapi/cxl.json
> new file mode 100644
> index 0000000000..ac7e167fa2
> --- /dev/null
> +++ b/qapi/cxl.json
> @@ -0,0 +1,118 @@
> +# -*- Mode: Python -*-
> +# vim: filetype=python
> +
> +##
> +# = CXL devices
> +##
> +
> +##
> +# @CxlUncorErrorType:
> +#
> +# Type of uncorrectable CXL error to inject. These errors are reported via
> +# an AER uncorrectable internal error with additional information logged at
> +# the CXL device.
> +#
> +# @cache-data-parity: Data error such as data parity or data ECC error CXL.cache
> +# @cache-address-parity: Address parity or other errors associated with the
> +#                        address field on CXL.cache
> +# @cache-be-parity: Byte enable parity or other byte enable errors on CXL.cache
> +# @cache-data-ecc: ECC error on CXL.cache
> +# @mem-data-parity: Data error such as data parity or data ECC error on CXL.mem
> +# @mem-address-parity: Address parity or other errors associated with the
> +#                      address field on CXL.mem
> +# @mem-be-parity: Byte enable parity or other byte enable errors on CXL.mem.
> +# @mem-data-ecc: Data ECC error on CXL.mem.
> +# @reinit-threshold: REINIT threshold hit.
> +# @rsvd-encoding: Received unrecognized encoding.
> +# @poison-received: Received poison from the peer.
> +# @receiver-overflow: Buffer overflows (first 3 bits of header log indicate which)
> +# @internal: Component specific error
> +# @cxl-ide-tx: Integrity and data encryption tx error.
> +# @cxl-ide-rx: Integrity and data encryption rx error.
> +##
> +
> +{ 'enum': 'CxlUncorErrorType',
> +  'data': ['cache-data-parity',
> +           'cache-address-parity',
> +           'cache-be-parity',
> +           'cache-data-ecc',
> +           'mem-data-parity',
> +           'mem-address-parity',
> +           'mem-be-parity',
> +           'mem-data-ecc',
> +           'reinit-threshold',
> +           'rsvd-encoding',
> +           'poison-received',
> +           'receiver-overflow',
> +           'internal',
> +           'cxl-ide-tx',
> +           'cxl-ide-rx'
> +           ]
> + }
> +
> +##
> +# @CXLUncorErrorRecord:
> +#
> +# Record of a single error including header log.
> +#
> +# @type: Type of error
> +# @header: 16 DWORD of header.
> +##
> +{ 'struct': 'CXLUncorErrorRecord',
> +  'data': {
> +      'type': 'CxlUncorErrorType',
> +      'header': [ 'uint32' ]
> +  }
> +}
> +
> +##
> +# @cxl-inject-uncorrectable-errors:
> +#
> +# Command to allow injection of multiple errors in one go. This allows testing
> +# of multiple header log handling in the OS.
> +#
> +# @path: CXL Type 3 device canonical QOM path
> +# @errors: Errors to inject
> +##
> +{ 'command': 'cxl-inject-uncorrectable-errors',
> +  'data': { 'path': 'str',
> +             'errors': [ 'CXLUncorErrorRecord' ] }}
> +
> +##
> +# @CxlCorErrorType:
> +#
> +# Type of CXL correctable error to inject
> +#
> +# @cache-data-ecc: Data ECC error on CXL.cache
> +# @mem-data-ecc: Data ECC error on CXL.mem
> +# @crc-threshold: Component specific and applicable to 68 byte Flit mode only.
> +# @cache-poison-received: Received poison from a peer on CXL.cache.
> +# @mem-poison-received: Received poison from a peer on CXL.mem
> +# @physical: Received error indication from the physical layer.
> +##
> +{ 'enum': 'CxlCorErrorType',
> +  'data': ['cache-data-ecc',
> +           'mem-data-ecc',
> +           'crc-threshold',
> +           'retry-threshold',
> +           'cache-poison-received',
> +           'mem-poison-received',
> +           'physical']
> +}
> +
> +##
> +# @cxl-inject-correctable-error:
> +#
> +# Command to inject a single correctable error.  Multiple error injection
> +# of this error type is not interesting as there is no associated header log.
> +# These errors are reported via AER as a correctable internal error, with
> +# additional detail available from the CXL device.
> +#
> +# @path: CXL Type 3 device canonical QOM path
> +# @type: Type of error.
> +##
> +{ 'command': 'cxl-inject-correctable-error',
> +  'data': { 'path': 'str',
> +            'type': 'CxlCorErrorType'
> +  }
> +}
> diff --git a/qapi/meson.build b/qapi/meson.build
> index fbdb442fdf..73c3c8c31a 100644
> --- a/qapi/meson.build
> +++ b/qapi/meson.build
> @@ -31,6 +31,7 @@ qapi_all_modules = [
>     'compat',
>     'control',
>     'crypto',
> +  'cxl',
>     'dump',
>     'error',
>     'introspect',
> diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
> index f000b90744..079f2a402a 100644
> --- a/qapi/qapi-schema.json
> +++ b/qapi/qapi-schema.json
> @@ -95,3 +95,4 @@
>   { 'include': 'pci.json' }
>   { 'include': 'stats.json' }
>   { 'include': 'virtio.json' }
> +{ 'include': 'cxl.json' }
Philippe Mathieu-Daudé Feb. 21, 2023, 10:15 p.m. UTC | #2
Hi Jonathan,

On 21/2/23 16:21, Jonathan Cameron wrote:
> CXL uses PCI AER Internal errors to signal to the host that an error has
> occurred. The host can then read more detailed status from the CXL RAS
> capability.
> 
> For uncorrectable errors: support multiple injection in one operation
> as this is needed to reliably test multiple header logging support in an
> OS. The equivalent feature doesn't exist for correctable errors, so only
> one error need be injected at a time.
> 
> Note:
>   - Header content needs to be manually specified in a fashion that
>     matches the specification for what can be in the header for each
>     error type.
> 
> Injection via QMP:
> { "execute": "qmp_capabilities" }
> ...
> { "execute": "cxl-inject-uncorrectable-errors",
>    "arguments": {
>      "path": "/machine/peripheral/cxl-pmem0",
>      "errors": [
>          {
>              "type": "cache-address-parity",
>              "header": [ 3, 4]
>          },
>          {
>              "type": "cache-data-parity",
>              "header": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
>          },
>          {
>              "type": "internal",
>              "header": [ 1, 2, 4]
>          }
>          ]
>    }}
> ...
> { "execute": "cxl-inject-correctable-error",
>      "arguments": {
>          "path": "/machine/peripheral/cxl-pmem0",
>          "type": "physical"
>      } }
> 
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
> v5: (Thanks to Dave Jiang for review)
> - Spell out Implementation Defined (previously typo as Imdef which did
>    not help)
> v4:
> - Improved QMP help text wth more detail (following request in review
>    of the Poison injection series)
> ---
>   hw/cxl/cxl-component-utils.c   |   4 +-
>   hw/mem/cxl_type3.c             | 281 +++++++++++++++++++++++++++++++++
>   hw/mem/cxl_type3_stubs.c       |  10 ++
>   hw/mem/meson.build             |   2 +
>   include/hw/cxl/cxl_component.h |  26 +++
>   include/hw/cxl/cxl_device.h    |  11 ++
>   qapi/cxl.json                  | 118 ++++++++++++++
>   qapi/meson.build               |   1 +
>   qapi/qapi-schema.json          |   1 +
>   9 files changed, 453 insertions(+), 1 deletion(-)


> diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
> new file mode 100644
> index 0000000000..b6b51ced54
> --- /dev/null
> +++ b/hw/mem/cxl_type3_stubs.c
> @@ -0,0 +1,10 @@
> +
> +#include "qemu/osdep.h"
> +#include "qapi/qapi-commands-cxl.h"
> +
> +void qmp_cxl_inject_uncorrectable_errors(const char *path,
> +                                         CXLUncorErrorRecordList *errors,
> +                                         Error **errp) {

What about:

     error_setg(errp, "CLX support is not compiled in");

}
> +
> +void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
> +                                      Error **errp) {}

Ditto.


> diff --git a/qapi/cxl.json b/qapi/cxl.json
> new file mode 100644
> index 0000000000..ac7e167fa2
> --- /dev/null
> +++ b/qapi/cxl.json
> @@ -0,0 +1,118 @@
> +# -*- Mode: Python -*-
> +# vim: filetype=python
> +
> +##
> +# = CXL devices
> +##
> +
> +##
> +# @CxlUncorErrorType:

Likely missing a "(since 8.0)" mention somewhere.

> +#
> +# Type of uncorrectable CXL error to inject. These errors are reported via
> +# an AER uncorrectable internal error with additional information logged at
> +# the CXL device.
> +#
> +# @cache-data-parity: Data error such as data parity or data ECC error CXL.cache
> +# @cache-address-parity: Address parity or other errors associated with the
> +#                        address field on CXL.cache
> +# @cache-be-parity: Byte enable parity or other byte enable errors on CXL.cache
> +# @cache-data-ecc: ECC error on CXL.cache
> +# @mem-data-parity: Data error such as data parity or data ECC error on CXL.mem
> +# @mem-address-parity: Address parity or other errors associated with the
> +#                      address field on CXL.mem
> +# @mem-be-parity: Byte enable parity or other byte enable errors on CXL.mem.
> +# @mem-data-ecc: Data ECC error on CXL.mem.
> +# @reinit-threshold: REINIT threshold hit.
> +# @rsvd-encoding: Received unrecognized encoding.
> +# @poison-received: Received poison from the peer.
> +# @receiver-overflow: Buffer overflows (first 3 bits of header log indicate which)
> +# @internal: Component specific error
> +# @cxl-ide-tx: Integrity and data encryption tx error.
> +# @cxl-ide-rx: Integrity and data encryption rx error.
> +##
> +
> +{ 'enum': 'CxlUncorErrorType',

Doesn't these need

      'if': 'CONFIG_CXL_MEM_DEVICE',

?

> +  'data': ['cache-data-parity',
> +           'cache-address-parity',
> +           'cache-be-parity',
> +           'cache-data-ecc',
> +           'mem-data-parity',
> +           'mem-address-parity',
> +           'mem-be-parity',
> +           'mem-data-ecc',
> +           'reinit-threshold',
> +           'rsvd-encoding',
> +           'poison-received',
> +           'receiver-overflow',
> +           'internal',
> +           'cxl-ide-tx',
> +           'cxl-ide-rx'
> +           ]
> + }
> +
> +##
> +# @CXLUncorErrorRecord:
> +#
> +# Record of a single error including header log.
> +#
> +# @type: Type of error
> +# @header: 16 DWORD of header.
> +##
> +{ 'struct': 'CXLUncorErrorRecord',
> +  'data': {
> +      'type': 'CxlUncorErrorType',
> +      'header': [ 'uint32' ]
> +  }
> +}
> +
> +##
> +# @cxl-inject-uncorrectable-errors:
> +#
> +# Command to allow injection of multiple errors in one go. This allows testing
> +# of multiple header log handling in the OS.
> +#
> +# @path: CXL Type 3 device canonical QOM path
> +# @errors: Errors to inject
> +##
> +{ 'command': 'cxl-inject-uncorrectable-errors',
> +  'data': { 'path': 'str',
> +             'errors': [ 'CXLUncorErrorRecord' ] }}
> +
> +##
> +# @CxlCorErrorType:
> +#
> +# Type of CXL correctable error to inject
> +#
> +# @cache-data-ecc: Data ECC error on CXL.cache
> +# @mem-data-ecc: Data ECC error on CXL.mem
> +# @crc-threshold: Component specific and applicable to 68 byte Flit mode only.
> +# @cache-poison-received: Received poison from a peer on CXL.cache.
> +# @mem-poison-received: Received poison from a peer on CXL.mem
> +# @physical: Received error indication from the physical layer.
> +##
> +{ 'enum': 'CxlCorErrorType',
> +  'data': ['cache-data-ecc',
> +           'mem-data-ecc',
> +           'crc-threshold',
> +           'retry-threshold',
> +           'cache-poison-received',
> +           'mem-poison-received',
> +           'physical']
> +}
> +
> +##
> +# @cxl-inject-correctable-error:
> +#
> +# Command to inject a single correctable error.  Multiple error injection
> +# of this error type is not interesting as there is no associated header log.
> +# These errors are reported via AER as a correctable internal error, with
> +# additional detail available from the CXL device.
> +#
> +# @path: CXL Type 3 device canonical QOM path
> +# @type: Type of error.
> +##
> +{ 'command': 'cxl-inject-correctable-error',
> +  'data': { 'path': 'str',
> +            'type': 'CxlCorErrorType'
> +  }
> +}
Jonathan Cameron Feb. 22, 2023, 2:53 p.m. UTC | #3
On Tue, 21 Feb 2023 23:15:49 +0100
Philippe Mathieu-Daudé <philmd@linaro.org> wrote:

> Hi Jonathan,
> 
> On 21/2/23 16:21, Jonathan Cameron wrote:
> > CXL uses PCI AER Internal errors to signal to the host that an error has
> > occurred. The host can then read more detailed status from the CXL RAS
> > capability.
> > 
> > For uncorrectable errors: support multiple injection in one operation
> > as this is needed to reliably test multiple header logging support in an
> > OS. The equivalent feature doesn't exist for correctable errors, so only
> > one error need be injected at a time.
> > 
> > Note:
> >   - Header content needs to be manually specified in a fashion that
> >     matches the specification for what can be in the header for each
> >     error type.
> > 
> > Injection via QMP:
> > { "execute": "qmp_capabilities" }
> > ...
> > { "execute": "cxl-inject-uncorrectable-errors",
> >    "arguments": {
> >      "path": "/machine/peripheral/cxl-pmem0",
> >      "errors": [
> >          {
> >              "type": "cache-address-parity",
> >              "header": [ 3, 4]
> >          },
> >          {
> >              "type": "cache-data-parity",
> >              "header": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
> >          },
> >          {
> >              "type": "internal",
> >              "header": [ 1, 2, 4]
> >          }
> >          ]
> >    }}
> > ...
> > { "execute": "cxl-inject-correctable-error",
> >      "arguments": {
> >          "path": "/machine/peripheral/cxl-pmem0",
> >          "type": "physical"
> >      } }
> > 
> > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

Hi Philippe,

Thanks for your review.  One question inline.

> > +#
> > +# Type of uncorrectable CXL error to inject. These errors are reported via
> > +# an AER uncorrectable internal error with additional information logged at
> > +# the CXL device.
> > +#
> > +# @cache-data-parity: Data error such as data parity or data ECC error CXL.cache
> > +# @cache-address-parity: Address parity or other errors associated with the
> > +#                        address field on CXL.cache
> > +# @cache-be-parity: Byte enable parity or other byte enable errors on CXL.cache
> > +# @cache-data-ecc: ECC error on CXL.cache
> > +# @mem-data-parity: Data error such as data parity or data ECC error on CXL.mem
> > +# @mem-address-parity: Address parity or other errors associated with the
> > +#                      address field on CXL.mem
> > +# @mem-be-parity: Byte enable parity or other byte enable errors on CXL.mem.
> > +# @mem-data-ecc: Data ECC error on CXL.mem.
> > +# @reinit-threshold: REINIT threshold hit.
> > +# @rsvd-encoding: Received unrecognized encoding.
> > +# @poison-received: Received poison from the peer.
> > +# @receiver-overflow: Buffer overflows (first 3 bits of header log indicate which)
> > +# @internal: Component specific error
> > +# @cxl-ide-tx: Integrity and data encryption tx error.
> > +# @cxl-ide-rx: Integrity and data encryption rx error.
> > +##
> > +
> > +{ 'enum': 'CxlUncorErrorType',  
> 
> Doesn't these need
> 
>       'if': 'CONFIG_CXL_MEM_DEVICE',
> 
> ?

If I make this change I get a bunch of

./qapi/qapi-types-cxl.h:18:13: error: attempt to use poisoned "CONFIG_CXL_MEM_DEVICE"
   18 | #if defined(CONFIG_CXL_MEM_DEVICE)

It's a target specific define (I think) as built alongside PCI_EXPRESS
Only CXL_ACPI is specifically included by x86 and arm64 (out of tree)

To be honest though I don't fully understand the QEMU build system so the reason
for the error might be wrong.

> 
> > +  'data': ['cache-data-parity',
> > +           'cache-address-parity',
> > +           'cache-be-parity',
> > +           'cache-data-ecc',
> > +           'mem-data-parity',
> > +           'mem-address-parity',
> > +           'mem-be-parity',
> > +           'mem-data-ecc',
> > +           'reinit-threshold',
> > +           'rsvd-encoding',
> > +           'poison-received',
> > +           'receiver-overflow',
> > +           'internal',
> > +           'cxl-ide-tx',
> > +           'cxl-ide-rx'
> > +           ]
> > + }
Philippe Mathieu-Daudé Feb. 22, 2023, 3:32 p.m. UTC | #4
On 22/2/23 15:53, Jonathan Cameron wrote:
> On Tue, 21 Feb 2023 23:15:49 +0100
> Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
> 
>> Hi Jonathan,
>>
>> On 21/2/23 16:21, Jonathan Cameron wrote:
>>> CXL uses PCI AER Internal errors to signal to the host that an error has
>>> occurred. The host can then read more detailed status from the CXL RAS
>>> capability.
>>>
>>> For uncorrectable errors: support multiple injection in one operation
>>> as this is needed to reliably test multiple header logging support in an
>>> OS. The equivalent feature doesn't exist for correctable errors, so only
>>> one error need be injected at a time.
>>>
>>> Note:
>>>    - Header content needs to be manually specified in a fashion that
>>>      matches the specification for what can be in the header for each
>>>      error type.
>>>
>>> Injection via QMP:
>>> { "execute": "qmp_capabilities" }
>>> ...
>>> { "execute": "cxl-inject-uncorrectable-errors",
>>>     "arguments": {
>>>       "path": "/machine/peripheral/cxl-pmem0",
>>>       "errors": [
>>>           {
>>>               "type": "cache-address-parity",
>>>               "header": [ 3, 4]
>>>           },
>>>           {
>>>               "type": "cache-data-parity",
>>>               "header": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
>>>           },
>>>           {
>>>               "type": "internal",
>>>               "header": [ 1, 2, 4]
>>>           }
>>>           ]
>>>     }}
>>> ...
>>> { "execute": "cxl-inject-correctable-error",
>>>       "arguments": {
>>>           "path": "/machine/peripheral/cxl-pmem0",
>>>           "type": "physical"
>>>       } }
>>>
>>> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> 
> Hi Philippe,
> 
> Thanks for your review.  One question inline.
> 
>>> +#
>>> +# Type of uncorrectable CXL error to inject. These errors are reported via
>>> +# an AER uncorrectable internal error with additional information logged at
>>> +# the CXL device.
>>> +#
>>> +# @cache-data-parity: Data error such as data parity or data ECC error CXL.cache
>>> +# @cache-address-parity: Address parity or other errors associated with the
>>> +#                        address field on CXL.cache
>>> +# @cache-be-parity: Byte enable parity or other byte enable errors on CXL.cache
>>> +# @cache-data-ecc: ECC error on CXL.cache
>>> +# @mem-data-parity: Data error such as data parity or data ECC error on CXL.mem
>>> +# @mem-address-parity: Address parity or other errors associated with the
>>> +#                      address field on CXL.mem
>>> +# @mem-be-parity: Byte enable parity or other byte enable errors on CXL.mem.
>>> +# @mem-data-ecc: Data ECC error on CXL.mem.
>>> +# @reinit-threshold: REINIT threshold hit.
>>> +# @rsvd-encoding: Received unrecognized encoding.
>>> +# @poison-received: Received poison from the peer.
>>> +# @receiver-overflow: Buffer overflows (first 3 bits of header log indicate which)
>>> +# @internal: Component specific error
>>> +# @cxl-ide-tx: Integrity and data encryption tx error.
>>> +# @cxl-ide-rx: Integrity and data encryption rx error.
>>> +##
>>> +
>>> +{ 'enum': 'CxlUncorErrorType',
>>
>> Doesn't these need
>>
>>        'if': 'CONFIG_CXL_MEM_DEVICE',
>>
>> ?
> 
> If I make this change I get a bunch of
> 
> ./qapi/qapi-types-cxl.h:18:13: error: attempt to use poisoned "CONFIG_CXL_MEM_DEVICE"
>     18 | #if defined(CONFIG_CXL_MEM_DEVICE)

Err, I meant the generic CONFIG_CXL, not CONFIG_CXL_MEM_DEVICE.

> It's a target specific define (I think) as built alongside PCI_EXPRESS
> Only CXL_ACPI is specifically included by x86 and arm64 (out of tree)
> 
> To be honest though I don't fully understand the QEMU build system so the reason
> for the error might be wrong.

You need to restrict to system emulation (the 'have_system' check):

-- >8 --
diff --git a/qapi/meson.build b/qapi/meson.build
index fbdb442fdf..643c76d61c 100644
--- a/qapi/meson.build
+++ b/qapi/meson.build
@@ -31,6 +31,7 @@ qapi_all_modules = [
    'compat',
    'control',
    'crypto',
-  'cxl',
    'dump',
    'error',
    'introspect',
@@ -58,6 +59,7 @@ if have_system
      'audio',
      'qdev',
      'pci',
+    'cxl',
      'rdma',
      'rocker',
      'tpm',
---
Jonathan Cameron Feb. 22, 2023, 4:49 p.m. UTC | #5
...

> >>> +# Type of uncorrectable CXL error to inject. These errors are reported via
> >>> +# an AER uncorrectable internal error with additional information logged at
> >>> +# the CXL device.
> >>> +#
> >>> +# @cache-data-parity: Data error such as data parity or data ECC error CXL.cache
> >>> +# @cache-address-parity: Address parity or other errors associated with the
> >>> +#                        address field on CXL.cache
> >>> +# @cache-be-parity: Byte enable parity or other byte enable errors on CXL.cache
> >>> +# @cache-data-ecc: ECC error on CXL.cache
> >>> +# @mem-data-parity: Data error such as data parity or data ECC error on CXL.mem
> >>> +# @mem-address-parity: Address parity or other errors associated with the
> >>> +#                      address field on CXL.mem
> >>> +# @mem-be-parity: Byte enable parity or other byte enable errors on CXL.mem.
> >>> +# @mem-data-ecc: Data ECC error on CXL.mem.
> >>> +# @reinit-threshold: REINIT threshold hit.
> >>> +# @rsvd-encoding: Received unrecognized encoding.
> >>> +# @poison-received: Received poison from the peer.
> >>> +# @receiver-overflow: Buffer overflows (first 3 bits of header log indicate which)
> >>> +# @internal: Component specific error
> >>> +# @cxl-ide-tx: Integrity and data encryption tx error.
> >>> +# @cxl-ide-rx: Integrity and data encryption rx error.
> >>> +##
> >>> +
> >>> +{ 'enum': 'CxlUncorErrorType',  
> >>
> >> Doesn't these need
> >>
> >>        'if': 'CONFIG_CXL_MEM_DEVICE',
> >>
> >> ?  
> > 
> > If I make this change I get a bunch of
> > 
> > ./qapi/qapi-types-cxl.h:18:13: error: attempt to use poisoned "CONFIG_CXL_MEM_DEVICE"
> >     18 | #if defined(CONFIG_CXL_MEM_DEVICE)  
> 
> Err, I meant the generic CONFIG_CXL, not CONFIG_CXL_MEM_DEVICE.
> 
> > It's a target specific define (I think) as built alongside PCI_EXPRESS
> > Only CXL_ACPI is specifically included by x86 and arm64 (out of tree)
> > 
> > To be honest though I don't fully understand the QEMU build system so the reason
> > for the error might be wrong.  
> 
> You need to restrict to system emulation (the 'have_system' check):

This doesn't help - still have 
attempt to used poisoned "CONFIG_CXL"


> 
> -- >8 --  
> diff --git a/qapi/meson.build b/qapi/meson.build
> index fbdb442fdf..643c76d61c 100644
> --- a/qapi/meson.build
> +++ b/qapi/meson.build
> @@ -31,6 +31,7 @@ qapi_all_modules = [
>     'compat',
>     'control',
>     'crypto',
> -  'cxl',
>     'dump',
>     'error',
>     'introspect',
> @@ -58,6 +59,7 @@ if have_system
>       'audio',
>       'qdev',
>       'pci',
> +    'cxl',
>       'rdma',
>       'rocker',
>       'tpm',
> ---
>
Philippe Mathieu-Daudé Feb. 22, 2023, 6:16 p.m. UTC | #6
+Thomas (meson) & Marc-André (conditional QAPI)

On 22/2/23 17:49, Jonathan Cameron wrote:
>>>>> +# Type of uncorrectable CXL error to inject. These errors are reported via
>>>>> +# an AER uncorrectable internal error with additional information logged at
>>>>> +# the CXL device.
>>>>> +#
>>>>> +# @cache-data-parity: Data error such as data parity or data ECC error CXL.cache
>>>>> +# @cache-address-parity: Address parity or other errors associated with the
>>>>> +#                        address field on CXL.cache
>>>>> +# @cache-be-parity: Byte enable parity or other byte enable errors on CXL.cache
>>>>> +# @cache-data-ecc: ECC error on CXL.cache
>>>>> +# @mem-data-parity: Data error such as data parity or data ECC error on CXL.mem
>>>>> +# @mem-address-parity: Address parity or other errors associated with the
>>>>> +#                      address field on CXL.mem
>>>>> +# @mem-be-parity: Byte enable parity or other byte enable errors on CXL.mem.
>>>>> +# @mem-data-ecc: Data ECC error on CXL.mem.
>>>>> +# @reinit-threshold: REINIT threshold hit.
>>>>> +# @rsvd-encoding: Received unrecognized encoding.
>>>>> +# @poison-received: Received poison from the peer.
>>>>> +# @receiver-overflow: Buffer overflows (first 3 bits of header log indicate which)
>>>>> +# @internal: Component specific error
>>>>> +# @cxl-ide-tx: Integrity and data encryption tx error.
>>>>> +# @cxl-ide-rx: Integrity and data encryption rx error.
>>>>> +##
>>>>> +
>>>>> +{ 'enum': 'CxlUncorErrorType',
>>>>
>>>> Doesn't these need
>>>>
>>>>         'if': 'CONFIG_CXL_MEM_DEVICE',
>>>>
>>>> ?
>>>
>>> If I make this change I get a bunch of
>>>
>>> ./qapi/qapi-types-cxl.h:18:13: error: attempt to use poisoned "CONFIG_CXL_MEM_DEVICE"
>>>      18 | #if defined(CONFIG_CXL_MEM_DEVICE)
>>
>> Err, I meant the generic CONFIG_CXL, not CONFIG_CXL_MEM_DEVICE.
>>
>>> It's a target specific define (I think) as built alongside PCI_EXPRESS
>>> Only CXL_ACPI is specifically included by x86 and arm64 (out of tree)
>>>
>>> To be honest though I don't fully understand the QEMU build system so the reason
>>> for the error might be wrong.
>>
>> You need to restrict to system emulation (the 'have_system' check):
> 
> This doesn't help - still have
> attempt to used poisoned "CONFIG_CXL"
> 
> 
>>
>> -- >8 --
>> diff --git a/qapi/meson.build b/qapi/meson.build
>> index fbdb442fdf..643c76d61c 100644
>> --- a/qapi/meson.build
>> +++ b/qapi/meson.build
>> @@ -31,6 +31,7 @@ qapi_all_modules = [
>>      'compat',
>>      'control',
>>      'crypto',
>> -  'cxl',
>>      'dump',
>>      'error',
>>      'introspect',
>> @@ -58,6 +59,7 @@ if have_system
>>        'audio',
>>        'qdev',
>>        'pci',
>> +    'cxl',
>>        'rdma',
>>        'rocker',
>>        'tpm',
>> ---
>>
>
Markus Armbruster Feb. 22, 2023, 6:28 p.m. UTC | #7
Jonathan Cameron <Jonathan.Cameron@huawei.com> writes:

> On Tue, 21 Feb 2023 23:15:49 +0100
> Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
>
>> Hi Jonathan,
>> 
>> On 21/2/23 16:21, Jonathan Cameron wrote:
>> > CXL uses PCI AER Internal errors to signal to the host that an error has
>> > occurred. The host can then read more detailed status from the CXL RAS
>> > capability.
>> > 
>> > For uncorrectable errors: support multiple injection in one operation
>> > as this is needed to reliably test multiple header logging support in an
>> > OS. The equivalent feature doesn't exist for correctable errors, so only
>> > one error need be injected at a time.
>> > 
>> > Note:
>> >   - Header content needs to be manually specified in a fashion that
>> >     matches the specification for what can be in the header for each
>> >     error type.
>> > 
>> > Injection via QMP:
>> > { "execute": "qmp_capabilities" }
>> > ...
>> > { "execute": "cxl-inject-uncorrectable-errors",
>> >    "arguments": {
>> >      "path": "/machine/peripheral/cxl-pmem0",
>> >      "errors": [
>> >          {
>> >              "type": "cache-address-parity",
>> >              "header": [ 3, 4]
>> >          },
>> >          {
>> >              "type": "cache-data-parity",
>> >              "header": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
>> >          },
>> >          {
>> >              "type": "internal",
>> >              "header": [ 1, 2, 4]
>> >          }
>> >          ]
>> >    }}
>> > ...
>> > { "execute": "cxl-inject-correctable-error",
>> >      "arguments": {
>> >          "path": "/machine/peripheral/cxl-pmem0",
>> >          "type": "physical"
>> >      } }
>> > 
>> > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>
> Hi Philippe,
>
> Thanks for your review.  One question inline.
>
>> > +#
>> > +# Type of uncorrectable CXL error to inject. These errors are reported via
>> > +# an AER uncorrectable internal error with additional information logged at
>> > +# the CXL device.
>> > +#
>> > +# @cache-data-parity: Data error such as data parity or data ECC error CXL.cache
>> > +# @cache-address-parity: Address parity or other errors associated with the
>> > +#                        address field on CXL.cache
>> > +# @cache-be-parity: Byte enable parity or other byte enable errors on CXL.cache
>> > +# @cache-data-ecc: ECC error on CXL.cache
>> > +# @mem-data-parity: Data error such as data parity or data ECC error on CXL.mem
>> > +# @mem-address-parity: Address parity or other errors associated with the
>> > +#                      address field on CXL.mem
>> > +# @mem-be-parity: Byte enable parity or other byte enable errors on CXL.mem.
>> > +# @mem-data-ecc: Data ECC error on CXL.mem.
>> > +# @reinit-threshold: REINIT threshold hit.
>> > +# @rsvd-encoding: Received unrecognized encoding.
>> > +# @poison-received: Received poison from the peer.
>> > +# @receiver-overflow: Buffer overflows (first 3 bits of header log indicate which)
>> > +# @internal: Component specific error
>> > +# @cxl-ide-tx: Integrity and data encryption tx error.
>> > +# @cxl-ide-rx: Integrity and data encryption rx error.
>> > +##
>> > +
>> > +{ 'enum': 'CxlUncorErrorType',  
>> 
>> Doesn't these need
>> 
>>       'if': 'CONFIG_CXL_MEM_DEVICE',
>> 
>> ?
>
> If I make this change I get a bunch of
>
> ./qapi/qapi-types-cxl.h:18:13: error: attempt to use poisoned "CONFIG_CXL_MEM_DEVICE"
>    18 | #if defined(CONFIG_CXL_MEM_DEVICE)

This means you're trying to use target-dependent stuff in
target-independent code.

Have a look at the thread

    Subject: Can we unpoison CONFIG_FOO macros?
    Message-ID: <87lel9o56z.fsf@pond.sub.org>
    https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg01885.html

and if questions remain, ask them right here.

> It's a target specific define (I think) as built alongside PCI_EXPRESS
> Only CXL_ACPI is specifically included by x86 and arm64 (out of tree)
>
> To be honest though I don't fully understand the QEMU build system so the reason
> for the error might be wrong.

[...]
Thomas Huth Feb. 23, 2023, 6:58 a.m. UTC | #8
On 22/02/2023 19.16, Philippe Mathieu-Daudé wrote:
> +Thomas (meson) & Marc-André (conditional QAPI)

+ Markus

> On 22/2/23 17:49, Jonathan Cameron wrote:
>>>>>> +# Type of uncorrectable CXL error to inject. These errors are 
>>>>>> reported via
>>>>>> +# an AER uncorrectable internal error with additional information 
>>>>>> logged at
>>>>>> +# the CXL device.
>>>>>> +#
>>>>>> +# @cache-data-parity: Data error such as data parity or data ECC 
>>>>>> error CXL.cache
>>>>>> +# @cache-address-parity: Address parity or other errors associated 
>>>>>> with the
>>>>>> +#                        address field on CXL.cache
>>>>>> +# @cache-be-parity: Byte enable parity or other byte enable errors on 
>>>>>> CXL.cache
>>>>>> +# @cache-data-ecc: ECC error on CXL.cache
>>>>>> +# @mem-data-parity: Data error such as data parity or data ECC error 
>>>>>> on CXL.mem
>>>>>> +# @mem-address-parity: Address parity or other errors associated with 
>>>>>> the
>>>>>> +#                      address field on CXL.mem
>>>>>> +# @mem-be-parity: Byte enable parity or other byte enable errors on 
>>>>>> CXL.mem.
>>>>>> +# @mem-data-ecc: Data ECC error on CXL.mem.
>>>>>> +# @reinit-threshold: REINIT threshold hit.
>>>>>> +# @rsvd-encoding: Received unrecognized encoding.
>>>>>> +# @poison-received: Received poison from the peer.
>>>>>> +# @receiver-overflow: Buffer overflows (first 3 bits of header log 
>>>>>> indicate which)
>>>>>> +# @internal: Component specific error
>>>>>> +# @cxl-ide-tx: Integrity and data encryption tx error.
>>>>>> +# @cxl-ide-rx: Integrity and data encryption rx error.
>>>>>> +##
>>>>>> +
>>>>>> +{ 'enum': 'CxlUncorErrorType',
>>>>>
>>>>> Doesn't these need
>>>>>
>>>>>         'if': 'CONFIG_CXL_MEM_DEVICE',
>>>>>
>>>>> ?
>>>>
>>>> If I make this change I get a bunch of
>>>>
>>>> ./qapi/qapi-types-cxl.h:18:13: error: attempt to use poisoned 
>>>> "CONFIG_CXL_MEM_DEVICE"
>>>>      18 | #if defined(CONFIG_CXL_MEM_DEVICE)
>>>
>>> Err, I meant the generic CONFIG_CXL, not CONFIG_CXL_MEM_DEVICE.
>>>
>>>> It's a target specific define (I think) as built alongside PCI_EXPRESS
>>>> Only CXL_ACPI is specifically included by x86 and arm64 (out of tree)
>>>>
>>>> To be honest though I don't fully understand the QEMU build system so 
>>>> the reason
>>>> for the error might be wrong.
>>>
>>> You need to restrict to system emulation (the 'have_system' check):
>>
>> This doesn't help - still have
>> attempt to used poisoned "CONFIG_CXL"

Not sure how the QAPI generator works, but target specific config switches 
can only be used in target specific json files there, so that's 
machine-target.json and misc-target.json currently, as far as I know. Not 
sure how the QAPI generator distinguishes between common and target specific 
code, though ... just by the "-target" suffix? Maybe Markus or Marc-André 
can comment on that.

See also:

  https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg01885.html
  https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg02001.html

  Thomas
Markus Armbruster Feb. 23, 2023, 7:37 a.m. UTC | #9
Thomas Huth <thuth@redhat.com> writes:

> On 22/02/2023 19.16, Philippe Mathieu-Daudé wrote:
>> +Thomas (meson) & Marc-André (conditional QAPI)
>
> + Markus
>
>> On 22/2/23 17:49, Jonathan Cameron wrote:

[...]

>>>>>> Doesn't these need
>>>>>>
>>>>>>         'if': 'CONFIG_CXL_MEM_DEVICE',
>>>>>>
>>>>>> ?
>>>>>
>>>>> If I make this change I get a bunch of
>>>>>
>>>>> ./qapi/qapi-types-cxl.h:18:13: error: attempt to use poisoned "CONFIG_CXL_MEM_DEVICE"
>>>>>      18 | #if defined(CONFIG_CXL_MEM_DEVICE)
>>>>
>>>> Err, I meant the generic CONFIG_CXL, not CONFIG_CXL_MEM_DEVICE.
>>>>
>>>>> It's a target specific define (I think) as built alongside PCI_EXPRESS
>>>>> Only CXL_ACPI is specifically included by x86 and arm64 (out of tree)
>>>>>
>>>>> To be honest though I don't fully understand the QEMU build system so the reason
>>>>> for the error might be wrong.
>>>>
>>>> You need to restrict to system emulation (the 'have_system' check):
>>>
>>> This doesn't help - still have
>>> attempt to used poisoned "CONFIG_CXL"
>
> Not sure how the QAPI generator works, but target specific config switches can only be used in target specific json files there, so that's machine-target.json and misc-target.json currently, as far as I know. Not sure how the QAPI generator distinguishes between common and target specific code, though ... just by the "-target" suffix? Maybe Markus or Marc-André can comment on that.

Whenever you use a poisoned macro in a conditional, all the code
generated for this .json file (we call it a "QAPI schema module")
becomes target-dependent.  The QAPI code generator itself is blissfully
unaware of this.

Since target-dependent code needs to be compiled differently, the build
process needs to be know which modules are target-dependent.  We do this
in one of the stupidest ways that could possibly work: a module is
target-dependent if its name ends with "-target".  There are just two
right now: qapi/machine-target.json and qapi/misc-target.json.

The logic resides in qapi/meson.build.  Look for

    if module.endswith('-target')

Questions?

[...]
Jonathan Cameron Feb. 23, 2023, 2:27 p.m. UTC | #10
On Thu, 23 Feb 2023 08:37:46 +0100
Markus Armbruster <armbru@redhat.com> wrote:

> Thomas Huth <thuth@redhat.com> writes:
> 
> > On 22/02/2023 19.16, Philippe Mathieu-Daudé wrote:  
> >> +Thomas (meson) & Marc-André (conditional QAPI)  
> >
> > + Markus
> >  
> >> On 22/2/23 17:49, Jonathan Cameron wrote:  
> 
> [...]
> 
> >>>>>> Doesn't these need
> >>>>>>
> >>>>>>         'if': 'CONFIG_CXL_MEM_DEVICE',
> >>>>>>
> >>>>>> ?  
> >>>>>
> >>>>> If I make this change I get a bunch of
> >>>>>
> >>>>> ./qapi/qapi-types-cxl.h:18:13: error: attempt to use poisoned "CONFIG_CXL_MEM_DEVICE"
> >>>>>      18 | #if defined(CONFIG_CXL_MEM_DEVICE)  
> >>>>
> >>>> Err, I meant the generic CONFIG_CXL, not CONFIG_CXL_MEM_DEVICE.
> >>>>  
> >>>>> It's a target specific define (I think) as built alongside PCI_EXPRESS
> >>>>> Only CXL_ACPI is specifically included by x86 and arm64 (out of tree)
> >>>>>
> >>>>> To be honest though I don't fully understand the QEMU build system so the reason
> >>>>> for the error might be wrong.  
> >>>>
> >>>> You need to restrict to system emulation (the 'have_system' check):  
> >>>
> >>> This doesn't help - still have
> >>> attempt to used poisoned "CONFIG_CXL"  
> >
> > Not sure how the QAPI generator works, but target specific config switches can only be used in target specific json files there, so that's machine-target.json and misc-target.json currently, as far as I know. Not sure how the QAPI generator distinguishes between common and target specific code, though ... just by the "-target" suffix? Maybe Markus or Marc-André can comment on that.  
> 
> Whenever you use a poisoned macro in a conditional, all the code
> generated for this .json file (we call it a "QAPI schema module")
> becomes target-dependent.  The QAPI code generator itself is blissfully
> unaware of this.
> 
> Since target-dependent code needs to be compiled differently, the build
> process needs to be know which modules are target-dependent.  We do this
> in one of the stupidest ways that could possibly work: a module is
> target-dependent if its name ends with "-target".  There are just two
> right now: qapi/machine-target.json and qapi/misc-target.json.
> 
> The logic resides in qapi/meson.build.  Look for
> 
>     if module.endswith('-target')

Thanks for all the pointers.
> 
> Questions?

Is it sensible to make the cxl stuff all target dependent and do the following?
I like that we can get rid of the stubs if we do this but I'm sure there are
disadvantages. Only alternative I can currently see is continue to have
stubs and not make the qmp commands conditional on them doing anything useful.

Note this is on top of my tree so involves more changes - I'll push it down
into the relevant series.

From 551122103cf1f5bb4de8ee005482c72532181439 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Thu, 23 Feb 2023 14:22:53 +0000
Subject: [PATCH] hw/cxl: Make CXL compilation target specific

---
 hw/cxl/cxl-host-stubs.c            | 15 --------
 hw/cxl/meson.build                 |  6 +---
 hw/mem/cxl_type3.c                 |  3 +-
 hw/mem/cxl_type3_stubs.c           | 58 ------------------------------
 hw/mem/meson.build                 |  8 ++---
 qapi/{cxl.json => cxl-target.json} | 37 +++++++++++++------
 qapi/meson.build                   |  2 +-
 qapi/qapi-schema.json              |  2 +-
 8 files changed, 35 insertions(+), 96 deletions(-)

diff --git a/hw/cxl/cxl-host-stubs.c b/hw/cxl/cxl-host-stubs.c
deleted file mode 100644
index cae4afcdde..0000000000
--- a/hw/cxl/cxl-host-stubs.c
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * CXL host parameter parsing routine stubs
- *
- * Copyright (c) 2022 Huawei
- */
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "hw/cxl/cxl.h"
-#include "hw/cxl/cxl_host.h"
-
-void cxl_fmws_link_targets(CXLState *stat, Error **errp) {};
-void cxl_machine_init(Object *obj, CXLState *state) {};
-void cxl_hook_up_pxb_registers(PCIBus *bus, CXLState *state, Error **errp) {};
-
-const MemoryRegionOps cfmws_ops;
diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build
index 99ee564ce8..99eeb84268 100644
--- a/hw/cxl/meson.build
+++ b/hw/cxl/meson.build
@@ -1,4 +1,4 @@
-softmmu_ss.add(when: 'CONFIG_CXL',
+specific_ss.add(when: 'CONFIG_CXL',
                if_true: files(
                    'cxl-component-utils.c',
                    'cxl-device-utils.c',
@@ -8,9 +8,5 @@ softmmu_ss.add(when: 'CONFIG_CXL',
                    'cxl-events.c',
                    'cxl-cpmu.c',
                    'switch-mailbox-cci.c',
-               ),
-               if_false: files(
-                   'cxl-host-stubs.c',
                ))
 
-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('cxl-host-stubs.c'))
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 334ce92f5e..cf20fb81ff 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1,7 +1,8 @@
 #include "qemu/osdep.h"
+#include CONFIG_DEVICES
 #include "qemu/units.h"
 #include "qemu/error-report.h"
-#include "qapi/qapi-commands-cxl.h"
+#include "qapi/qapi-commands-cxl-target.h"
 #include "hw/mem/memory-device.h"
 #include "hw/mem/pc-dimm.h"
 #include "hw/pci/pci.h"
diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
deleted file mode 100644
index 2196bd841c..0000000000
--- a/hw/mem/cxl_type3_stubs.c
+++ /dev/null
@@ -1,58 +0,0 @@
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qapi/qapi-commands-cxl.h"
-
-void qmp_cxl_inject_gen_media_event(const char *path, CxlEventLog log,
-                                    uint8_t flags, uint64_t physaddr,
-                                    uint8_t descriptor, uint8_t type,
-                                    uint8_t transaction_type,
-                                    bool has_channel, uint8_t channel,
-                                    bool has_rank, uint8_t rank,
-                                    bool has_device, uint32_t device,
-                                    const char *component_id,
-                                    Error **errp) {}
-
-void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
-                               uint64_t physaddr, uint8_t descriptor,
-                               uint8_t type, uint8_t transaction_type,
-                               bool has_channel, uint8_t channel,
-                               bool has_rank, uint8_t rank,
-                               bool has_nibble_mask, uint32_t nibble_mask,
-                               bool has_bank_group, uint8_t bank_group,
-                               bool has_bank, uint8_t bank,
-                               bool has_row, uint32_t row,
-                               bool has_column, uint16_t column,
-                               bool has_correction_mask, uint64List *correction_mask,
-                               Error **errp) {}
-
-void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
-                                        uint8_t flags, uint8_t type,
-                                        uint8_t health_status,
-                                        uint8_t media_status,
-                                        uint8_t additional_status,
-                                        uint8_t life_used,
-                                        int16_t temperature,
-                                        uint32_t dirty_shutdown_count,
-                                        uint32_t corrected_volatile_error_count,
-                                        uint32_t corrected_persistent_error_count,
-                                        Error **errp) {}
-
-void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
-                           Error **errp)
-{
-    error_setg(errp, "CXL Type 3 support is not compiled in");
-}
-
-void qmp_cxl_inject_uncorrectable_errors(const char *path,
-                                         CXLUncorErrorRecordList *errors,
-                                         Error **errp)
-{
-    error_setg(errp, "CXL Type 3 support is not compiled in");
-}
-
-void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
-                                      Error **errp)
-{
-    error_setg(errp, "CXL Type 3 support is not compiled in");
-}
diff --git a/hw/mem/meson.build b/hw/mem/meson.build
index 56c2618b84..2bdd24512e 100644
--- a/hw/mem/meson.build
+++ b/hw/mem/meson.build
@@ -3,10 +3,10 @@ mem_ss.add(files('memory-device.c'))
 mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c'))
 mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c'))
 mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c'))
-mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c'))
-softmmu_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_false: files('cxl_type3_stubs.c'))
-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('cxl_type3_stubs.c'))
+specific_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c'))
+#specific_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_false: files('cxl_type3_stubs.c'))
+#specific_ss.add(when: 'CONFIG_ALL', if_true: files('cxl_type3_stubs.c'))
 
-softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss)
+specific_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss)
 
 softmmu_ss.add(when: 'CONFIG_SPARSE_MEM', if_true: files('sparse-mem.c'))
diff --git a/qapi/cxl.json b/qapi/cxl-target.json
similarity index 94%
rename from qapi/cxl.json
rename to qapi/cxl-target.json
index 4c029f2807..b3aecea4f0 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl-target.json
@@ -21,7 +21,8 @@
            'warning',
            'failure',
            'fatal'
-           ]
+           ],
+  'if': 'CONFIG_CXL_MEM_DEVICE'
  }
 
 ##
@@ -49,7 +50,9 @@
             'type': 'uint8', 'transaction-type': 'uint8',
             '*channel': 'uint8', '*rank': 'uint8',
             '*device': 'uint32', '*component-id': 'str'
-            }}
+            },
+  'if': 'CONFIG_CXL_MEM_DEVICE'
+ }
 
 ##
 # @cxl-inject-dram-event:
@@ -82,7 +85,9 @@
             '*channel': 'uint8', '*rank': 'uint8', '*nibble-mask': 'uint32',
             '*bank-group': 'uint8', '*bank': 'uint8', '*row': 'uint32',
             '*column': 'uint16', '*correction-mask': [ 'uint64' ]
-           }}
+           },
+  'if': 'CONFIG_CXL_MEM_DEVICE'
+}
 
 ##
 # @cxl-inject-memory-module-event:
@@ -115,7 +120,9 @@
             'dirty-shutdown-count': 'uint32',
             'corrected-volatile-error-count': 'uint32',
             'corrected-persistent-error-count': 'uint32'
-            }}
+            },
+  'if': 'CONFIG_CXL_MEM_DEVICE'
+}
 
 ##
 # @cxl-inject-poison:
@@ -133,7 +140,9 @@
 # Since: 8.0
 ##
 { 'command': 'cxl-inject-poison',
-  'data': { 'path': 'str', 'start': 'uint64', 'length': 'uint64' }}
+  'data': { 'path': 'str', 'start': 'uint64', 'length': 'uint64' },
+  'if': 'CONFIG_CXL_MEM_DEVICE'
+}
 
 ##
 # @CxlUncorErrorType:
@@ -179,8 +188,9 @@
            'internal',
            'cxl-ide-tx',
            'cxl-ide-rx'
-           ]
- }
+           ],
+  'if': 'CONFIG_CXL_MEM_DEVICE'
+}
 
 ##
 # @CXLUncorErrorRecord:
@@ -196,7 +206,8 @@
   'data': {
       'type': 'CxlUncorErrorType',
       'header': [ 'uint32' ]
-  }
+  },
+  'if': 'CONFIG_CXL_MEM_DEVICE'
 }
 
 ##
@@ -212,7 +223,9 @@
 ##
 { 'command': 'cxl-inject-uncorrectable-errors',
   'data': { 'path': 'str',
-             'errors': [ 'CXLUncorErrorRecord' ] }}
+             'errors': [ 'CXLUncorErrorRecord' ] },
+  'if': 'CONFIG_CXL_MEM_DEVICE'
+}
 
 ##
 # @CxlCorErrorType:
@@ -235,7 +248,8 @@
            'retry-threshold',
            'cache-poison-received',
            'mem-poison-received',
-           'physical']
+           'physical'],
+  'if': 'CONFIG_CXL_MEM_DEVICE'
 }
 
 ##
@@ -254,5 +268,6 @@
 { 'command': 'cxl-inject-correctable-error',
   'data': { 'path': 'str',
             'type': 'CxlCorErrorType'
-  }
+           },
+  'if': 'CONFIG_CXL_MEM_DEVICE'
 }
diff --git a/qapi/meson.build b/qapi/meson.build
index 73c3c8c31a..f5b3f36979 100644
--- a/qapi/meson.build
+++ b/qapi/meson.build
@@ -31,7 +31,7 @@ qapi_all_modules = [
   'compat',
   'control',
   'crypto',
-  'cxl',
+  'cxl-target',
   'dump',
   'error',
   'introspect',
diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
index 079f2a402a..a79d84577f 100644
--- a/qapi/qapi-schema.json
+++ b/qapi/qapi-schema.json
@@ -95,4 +95,4 @@
 { 'include': 'pci.json' }
 { 'include': 'stats.json' }
 { 'include': 'virtio.json' }
-{ 'include': 'cxl.json' }
+{ 'include': 'cxl-target.json' }
Jonathan Cameron Feb. 24, 2023, 5:37 p.m. UTC | #11
On Thu, 23 Feb 2023 14:27:48 +0000
Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote:

> On Thu, 23 Feb 2023 08:37:46 +0100
> Markus Armbruster <armbru@redhat.com> wrote:
> 
> > Thomas Huth <thuth@redhat.com> writes:
> >   
> > > On 22/02/2023 19.16, Philippe Mathieu-Daudé wrote:    
> > >> +Thomas (meson) & Marc-André (conditional QAPI)    
> > >
> > > + Markus
> > >    
> > >> On 22/2/23 17:49, Jonathan Cameron wrote:    
> > 
> > [...]
> >   
> > >>>>>> Doesn't these need
> > >>>>>>
> > >>>>>>         'if': 'CONFIG_CXL_MEM_DEVICE',
> > >>>>>>
> > >>>>>> ?    
> > >>>>>
> > >>>>> If I make this change I get a bunch of
> > >>>>>
> > >>>>> ./qapi/qapi-types-cxl.h:18:13: error: attempt to use poisoned "CONFIG_CXL_MEM_DEVICE"
> > >>>>>      18 | #if defined(CONFIG_CXL_MEM_DEVICE)    
> > >>>>
> > >>>> Err, I meant the generic CONFIG_CXL, not CONFIG_CXL_MEM_DEVICE.
> > >>>>    
> > >>>>> It's a target specific define (I think) as built alongside PCI_EXPRESS
> > >>>>> Only CXL_ACPI is specifically included by x86 and arm64 (out of tree)
> > >>>>>
> > >>>>> To be honest though I don't fully understand the QEMU build system so the reason
> > >>>>> for the error might be wrong.    
> > >>>>
> > >>>> You need to restrict to system emulation (the 'have_system' check):    
> > >>>
> > >>> This doesn't help - still have
> > >>> attempt to used poisoned "CONFIG_CXL"    
> > >
> > > Not sure how the QAPI generator works, but target specific config switches can only be used in target specific json files there, so that's machine-target.json and misc-target.json currently, as far as I know. Not sure how the QAPI generator distinguishes between common and target specific code, though ... just by the "-target" suffix? Maybe Markus or Marc-André can comment on that.    
> > 
> > Whenever you use a poisoned macro in a conditional, all the code
> > generated for this .json file (we call it a "QAPI schema module")
> > becomes target-dependent.  The QAPI code generator itself is blissfully
> > unaware of this.
> > 
> > Since target-dependent code needs to be compiled differently, the build
> > process needs to be know which modules are target-dependent.  We do this
> > in one of the stupidest ways that could possibly work: a module is
> > target-dependent if its name ends with "-target".  There are just two
> > right now: qapi/machine-target.json and qapi/misc-target.json.
> > 
> > The logic resides in qapi/meson.build.  Look for
> > 
> >     if module.endswith('-target')  
> 
> Thanks for all the pointers.
> > 
> > Questions?  
> 
> Is it sensible to make the cxl stuff all target dependent and do the following?
> I like that we can get rid of the stubs if we do this but I'm sure there are
> disadvantages. Only alternative I can currently see is continue to have
> stubs and not make the qmp commands conditional on them doing anything useful.
> 
> Note this is on top of my tree so involves more changes - I'll push it down
> into the relevant series.

I got too focused on getting it to build, and failed to realize that the below
change results in the new commands not being available anywhere. Oops.

Anyhow, my current conclusion is there isn't an easy way to make these conditional
so we should just keep the stubs for non CXL supporting builds.

If I'm wrong on that, let me know.

Thanks,

Jonathan

> 
> From 551122103cf1f5bb4de8ee005482c72532181439 Mon Sep 17 00:00:00 2001
> From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Date: Thu, 23 Feb 2023 14:22:53 +0000
> Subject: [PATCH] hw/cxl: Make CXL compilation target specific
> 
> ---
>  hw/cxl/cxl-host-stubs.c            | 15 --------
>  hw/cxl/meson.build                 |  6 +---
>  hw/mem/cxl_type3.c                 |  3 +-
>  hw/mem/cxl_type3_stubs.c           | 58 ------------------------------
>  hw/mem/meson.build                 |  8 ++---
>  qapi/{cxl.json => cxl-target.json} | 37 +++++++++++++------
>  qapi/meson.build                   |  2 +-
>  qapi/qapi-schema.json              |  2 +-
>  8 files changed, 35 insertions(+), 96 deletions(-)
> 
> diff --git a/hw/cxl/cxl-host-stubs.c b/hw/cxl/cxl-host-stubs.c
> deleted file mode 100644
> index cae4afcdde..0000000000
> --- a/hw/cxl/cxl-host-stubs.c
> +++ /dev/null
> @@ -1,15 +0,0 @@
> -/*
> - * CXL host parameter parsing routine stubs
> - *
> - * Copyright (c) 2022 Huawei
> - */
> -#include "qemu/osdep.h"
> -#include "qapi/error.h"
> -#include "hw/cxl/cxl.h"
> -#include "hw/cxl/cxl_host.h"
> -
> -void cxl_fmws_link_targets(CXLState *stat, Error **errp) {};
> -void cxl_machine_init(Object *obj, CXLState *state) {};
> -void cxl_hook_up_pxb_registers(PCIBus *bus, CXLState *state, Error **errp) {};
> -
> -const MemoryRegionOps cfmws_ops;
> diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build
> index 99ee564ce8..99eeb84268 100644
> --- a/hw/cxl/meson.build
> +++ b/hw/cxl/meson.build
> @@ -1,4 +1,4 @@
> -softmmu_ss.add(when: 'CONFIG_CXL',
> +specific_ss.add(when: 'CONFIG_CXL',
>                 if_true: files(
>                     'cxl-component-utils.c',
>                     'cxl-device-utils.c',
> @@ -8,9 +8,5 @@ softmmu_ss.add(when: 'CONFIG_CXL',
>                     'cxl-events.c',
>                     'cxl-cpmu.c',
>                     'switch-mailbox-cci.c',
> -               ),
> -               if_false: files(
> -                   'cxl-host-stubs.c',
>                 ))
>  
> -softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('cxl-host-stubs.c'))
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 334ce92f5e..cf20fb81ff 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -1,7 +1,8 @@
>  #include "qemu/osdep.h"
> +#include CONFIG_DEVICES
>  #include "qemu/units.h"
>  #include "qemu/error-report.h"
> -#include "qapi/qapi-commands-cxl.h"
> +#include "qapi/qapi-commands-cxl-target.h"
>  #include "hw/mem/memory-device.h"
>  #include "hw/mem/pc-dimm.h"
>  #include "hw/pci/pci.h"
> diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
> deleted file mode 100644
> index 2196bd841c..0000000000
> --- a/hw/mem/cxl_type3_stubs.c
> +++ /dev/null
> @@ -1,58 +0,0 @@
> -
> -#include "qemu/osdep.h"
> -#include "qapi/error.h"
> -#include "qapi/qapi-commands-cxl.h"
> -
> -void qmp_cxl_inject_gen_media_event(const char *path, CxlEventLog log,
> -                                    uint8_t flags, uint64_t physaddr,
> -                                    uint8_t descriptor, uint8_t type,
> -                                    uint8_t transaction_type,
> -                                    bool has_channel, uint8_t channel,
> -                                    bool has_rank, uint8_t rank,
> -                                    bool has_device, uint32_t device,
> -                                    const char *component_id,
> -                                    Error **errp) {}
> -
> -void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
> -                               uint64_t physaddr, uint8_t descriptor,
> -                               uint8_t type, uint8_t transaction_type,
> -                               bool has_channel, uint8_t channel,
> -                               bool has_rank, uint8_t rank,
> -                               bool has_nibble_mask, uint32_t nibble_mask,
> -                               bool has_bank_group, uint8_t bank_group,
> -                               bool has_bank, uint8_t bank,
> -                               bool has_row, uint32_t row,
> -                               bool has_column, uint16_t column,
> -                               bool has_correction_mask, uint64List *correction_mask,
> -                               Error **errp) {}
> -
> -void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
> -                                        uint8_t flags, uint8_t type,
> -                                        uint8_t health_status,
> -                                        uint8_t media_status,
> -                                        uint8_t additional_status,
> -                                        uint8_t life_used,
> -                                        int16_t temperature,
> -                                        uint32_t dirty_shutdown_count,
> -                                        uint32_t corrected_volatile_error_count,
> -                                        uint32_t corrected_persistent_error_count,
> -                                        Error **errp) {}
> -
> -void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
> -                           Error **errp)
> -{
> -    error_setg(errp, "CXL Type 3 support is not compiled in");
> -}
> -
> -void qmp_cxl_inject_uncorrectable_errors(const char *path,
> -                                         CXLUncorErrorRecordList *errors,
> -                                         Error **errp)
> -{
> -    error_setg(errp, "CXL Type 3 support is not compiled in");
> -}
> -
> -void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
> -                                      Error **errp)
> -{
> -    error_setg(errp, "CXL Type 3 support is not compiled in");
> -}
> diff --git a/hw/mem/meson.build b/hw/mem/meson.build
> index 56c2618b84..2bdd24512e 100644
> --- a/hw/mem/meson.build
> +++ b/hw/mem/meson.build
> @@ -3,10 +3,10 @@ mem_ss.add(files('memory-device.c'))
>  mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c'))
>  mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c'))
>  mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c'))
> -mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c'))
> -softmmu_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_false: files('cxl_type3_stubs.c'))
> -softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('cxl_type3_stubs.c'))
> +specific_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c'))
> +#specific_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_false: files('cxl_type3_stubs.c'))
> +#specific_ss.add(when: 'CONFIG_ALL', if_true: files('cxl_type3_stubs.c'))
>  
> -softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss)
> +specific_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss)
>  
>  softmmu_ss.add(when: 'CONFIG_SPARSE_MEM', if_true: files('sparse-mem.c'))
> diff --git a/qapi/cxl.json b/qapi/cxl-target.json
> similarity index 94%
> rename from qapi/cxl.json
> rename to qapi/cxl-target.json
> index 4c029f2807..b3aecea4f0 100644
> --- a/qapi/cxl.json
> +++ b/qapi/cxl-target.json
> @@ -21,7 +21,8 @@
>             'warning',
>             'failure',
>             'fatal'
> -           ]
> +           ],
> +  'if': 'CONFIG_CXL_MEM_DEVICE'
>   }
>  
>  ##
> @@ -49,7 +50,9 @@
>              'type': 'uint8', 'transaction-type': 'uint8',
>              '*channel': 'uint8', '*rank': 'uint8',
>              '*device': 'uint32', '*component-id': 'str'
> -            }}
> +            },
> +  'if': 'CONFIG_CXL_MEM_DEVICE'
> + }
>  
>  ##
>  # @cxl-inject-dram-event:
> @@ -82,7 +85,9 @@
>              '*channel': 'uint8', '*rank': 'uint8', '*nibble-mask': 'uint32',
>              '*bank-group': 'uint8', '*bank': 'uint8', '*row': 'uint32',
>              '*column': 'uint16', '*correction-mask': [ 'uint64' ]
> -           }}
> +           },
> +  'if': 'CONFIG_CXL_MEM_DEVICE'
> +}
>  
>  ##
>  # @cxl-inject-memory-module-event:
> @@ -115,7 +120,9 @@
>              'dirty-shutdown-count': 'uint32',
>              'corrected-volatile-error-count': 'uint32',
>              'corrected-persistent-error-count': 'uint32'
> -            }}
> +            },
> +  'if': 'CONFIG_CXL_MEM_DEVICE'
> +}
>  
>  ##
>  # @cxl-inject-poison:
> @@ -133,7 +140,9 @@
>  # Since: 8.0
>  ##
>  { 'command': 'cxl-inject-poison',
> -  'data': { 'path': 'str', 'start': 'uint64', 'length': 'uint64' }}
> +  'data': { 'path': 'str', 'start': 'uint64', 'length': 'uint64' },
> +  'if': 'CONFIG_CXL_MEM_DEVICE'
> +}
>  
>  ##
>  # @CxlUncorErrorType:
> @@ -179,8 +188,9 @@
>             'internal',
>             'cxl-ide-tx',
>             'cxl-ide-rx'
> -           ]
> - }
> +           ],
> +  'if': 'CONFIG_CXL_MEM_DEVICE'
> +}
>  
>  ##
>  # @CXLUncorErrorRecord:
> @@ -196,7 +206,8 @@
>    'data': {
>        'type': 'CxlUncorErrorType',
>        'header': [ 'uint32' ]
> -  }
> +  },
> +  'if': 'CONFIG_CXL_MEM_DEVICE'
>  }
>  
>  ##
> @@ -212,7 +223,9 @@
>  ##
>  { 'command': 'cxl-inject-uncorrectable-errors',
>    'data': { 'path': 'str',
> -             'errors': [ 'CXLUncorErrorRecord' ] }}
> +             'errors': [ 'CXLUncorErrorRecord' ] },
> +  'if': 'CONFIG_CXL_MEM_DEVICE'
> +}
>  
>  ##
>  # @CxlCorErrorType:
> @@ -235,7 +248,8 @@
>             'retry-threshold',
>             'cache-poison-received',
>             'mem-poison-received',
> -           'physical']
> +           'physical'],
> +  'if': 'CONFIG_CXL_MEM_DEVICE'
>  }
>  
>  ##
> @@ -254,5 +268,6 @@
>  { 'command': 'cxl-inject-correctable-error',
>    'data': { 'path': 'str',
>              'type': 'CxlCorErrorType'
> -  }
> +           },
> +  'if': 'CONFIG_CXL_MEM_DEVICE'
>  }
> diff --git a/qapi/meson.build b/qapi/meson.build
> index 73c3c8c31a..f5b3f36979 100644
> --- a/qapi/meson.build
> +++ b/qapi/meson.build
> @@ -31,7 +31,7 @@ qapi_all_modules = [
>    'compat',
>    'control',
>    'crypto',
> -  'cxl',
> +  'cxl-target',
>    'dump',
>    'error',
>    'introspect',
> diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
> index 079f2a402a..a79d84577f 100644
> --- a/qapi/qapi-schema.json
> +++ b/qapi/qapi-schema.json
> @@ -95,4 +95,4 @@
>  { 'include': 'pci.json' }
>  { 'include': 'stats.json' }
>  { 'include': 'virtio.json' }
> -{ 'include': 'cxl.json' }
> +{ 'include': 'cxl-target.json' }
Philippe Mathieu-Daudé Feb. 24, 2023, 7:02 p.m. UTC | #12
On 23/2/23 15:27, Jonathan Cameron wrote:
> On Thu, 23 Feb 2023 08:37:46 +0100
> Markus Armbruster <armbru@redhat.com> wrote:
> 
>> Thomas Huth <thuth@redhat.com> writes:
>>
>>> On 22/02/2023 19.16, Philippe Mathieu-Daudé wrote:
>>>> +Thomas (meson) & Marc-André (conditional QAPI)
>>>
>>> + Markus
>>>   
>>>> On 22/2/23 17:49, Jonathan Cameron wrote:
>>
>> [...]
>>
>>>>>>>> Doesn't these need
>>>>>>>>
>>>>>>>>          'if': 'CONFIG_CXL_MEM_DEVICE',
>>>>>>>>
>>>>>>>> ?
>>>>>>>
>>>>>>> If I make this change I get a bunch of
>>>>>>>
>>>>>>> ./qapi/qapi-types-cxl.h:18:13: error: attempt to use poisoned "CONFIG_CXL_MEM_DEVICE"
>>>>>>>       18 | #if defined(CONFIG_CXL_MEM_DEVICE)
>>>>>>
>>>>>> Err, I meant the generic CONFIG_CXL, not CONFIG_CXL_MEM_DEVICE.
>>>>>>   
>>>>>>> It's a target specific define (I think) as built alongside PCI_EXPRESS
>>>>>>> Only CXL_ACPI is specifically included by x86 and arm64 (out of tree)
>>>>>>>
>>>>>>> To be honest though I don't fully understand the QEMU build system so the reason
>>>>>>> for the error might be wrong.
>>>>>>
>>>>>> You need to restrict to system emulation (the 'have_system' check):
>>>>>
>>>>> This doesn't help - still have
>>>>> attempt to used poisoned "CONFIG_CXL"
>>>
>>> Not sure how the QAPI generator works, but target specific config switches can only be used in target specific json files there, so that's machine-target.json and misc-target.json currently, as far as I know. Not sure how the QAPI generator distinguishes between common and target specific code, though ... just by the "-target" suffix? Maybe Markus or Marc-André can comment on that.
>>
>> Whenever you use a poisoned macro in a conditional, all the code
>> generated for this .json file (we call it a "QAPI schema module")
>> becomes target-dependent.  The QAPI code generator itself is blissfully
>> unaware of this.
>>
>> Since target-dependent code needs to be compiled differently, the build
>> process needs to be know which modules are target-dependent.  We do this
>> in one of the stupidest ways that could possibly work: a module is
>> target-dependent if its name ends with "-target".  There are just two
>> right now: qapi/machine-target.json and qapi/misc-target.json.
>>
>> The logic resides in qapi/meson.build.  Look for
>>
>>      if module.endswith('-target')
> 
> Thanks for all the pointers.
>>
>> Questions?
> 
> Is it sensible to make the cxl stuff all target dependent and do the following?
> I like that we can get rid of the stubs if we do this but I'm sure there are
> disadvantages. Only alternative I can currently see is continue to have
> stubs and not make the qmp commands conditional on them doing anything useful.

I still don't understand what is the target-dependent part of CXL.

IIUC CXL depends on PCIe which isn't target dependent.
Markus Armbruster Feb. 27, 2023, 9:40 a.m. UTC | #13
Philippe Mathieu-Daudé <philmd@linaro.org> writes:

> On 23/2/23 15:27, Jonathan Cameron wrote:
>> On Thu, 23 Feb 2023 08:37:46 +0100
>> Markus Armbruster <armbru@redhat.com> wrote:
>>> Whenever you use a poisoned macro in a conditional, all the code
>>> generated for this .json file (we call it a "QAPI schema module")
>>> becomes target-dependent.  The QAPI code generator itself is blissfully
>>> unaware of this.
>>>
>>> Since target-dependent code needs to be compiled differently, the build
>>> process needs to be know which modules are target-dependent.  We do this
>>> in one of the stupidest ways that could possibly work: a module is
>>> target-dependent if its name ends with "-target".  There are just two
>>> right now: qapi/machine-target.json and qapi/misc-target.json.
>>>
>>> The logic resides in qapi/meson.build.  Look for
>>>
>>>      if module.endswith('-target')
>>
>> Thanks for all the pointers.
>>
>>> Questions?
>>>
>> Is it sensible to make the cxl stuff all target dependent and do the following?
>> I like that we can get rid of the stubs if we do this but I'm sure there are
>> disadvantages. Only alternative I can currently see is continue to have
>> stubs and not make the qmp commands conditional on them doing anything useful.
>
> I still don't understand what is the target-dependent part of CXL.
>
> IIUC CXL depends on PCIe which isn't target dependent.

As far as I can tell, the target-dependent part of CXL is the macro
CONFIG_CXL :)

Consider a device model implemented in perfectly target-independent
code, to be linked only into some qemu-system-TARGET.  How do we do
that?

We put a 'config FOO' section in the appropriate Kconfig, and select it
from the target's Kconfig for the targets that want it.  We add device
model sources to Meson source set softmmu_ss when CONFIG_FOO.

This puts CONFIG_FOO=y into the TARGET-softmmu-config-devices.mak, and
#define CONFIG_FOO 1 into TARGET-softmmu-config-devices.h.  It also puts
#pragma GCC poison CONFIG_FOO into config-poison.h.

Note the two CONFIG_FOO have subtly different meaning:

* The make variable means "there is an enabled target that has FOO
  enabled".  It gets propagated to Meson.

* The C macro means "the current target has FOO enabled".  It therefore
  must not be used in target-independent code.  That's why we poison it
  in config-poison.h.

Note that the device model code has no use for C macro CONFIG_FOO.  It
remains target-independent as it should.

Now consider how to have the QAPI schema provide something for FOO.

If we make it a QAPI schema module of its own, we can arrange for it to
be linked only into the qemu-system-TARGET that have the device model,
just like the device model code.  We haven't tried this for individual
devices, only for whole subsystems like PCI.

If we don't make it a module of its own, we have two choices:

* We use 'if': 'CONFIG_FOO'.  This is actually the C macro.  The module
  becomes target-dependent.  We compile the code generated for the
  module separately for each target.

* We make it unconditional.  The module can remain target-independent.
  The code generated for FOO's QAPI schema is linked unconditionally,
  even when the target doesn't need it.  Any references to handwritten
  FOO code need to be satisfied with stubs.

I dislike both.  Existing usage seems to prefer "unconditional schema".
Sticking to that is okay.
Markus Armbruster Oct. 27, 2023, 4:54 a.m. UTC | #14
I'm trying to fill in QMP documentation holes, and found one in commit
415442a1b4a (this patch).  Details inline.

Jonathan Cameron <Jonathan.Cameron@huawei.com> writes:

> CXL uses PCI AER Internal errors to signal to the host that an error has
> occurred. The host can then read more detailed status from the CXL RAS
> capability.
>
> For uncorrectable errors: support multiple injection in one operation
> as this is needed to reliably test multiple header logging support in an
> OS. The equivalent feature doesn't exist for correctable errors, so only
> one error need be injected at a time.
>
> Note:
>  - Header content needs to be manually specified in a fashion that
>    matches the specification for what can be in the header for each
>    error type.
>
> Injection via QMP:
> { "execute": "qmp_capabilities" }
> ...
> { "execute": "cxl-inject-uncorrectable-errors",
>   "arguments": {
>     "path": "/machine/peripheral/cxl-pmem0",
>     "errors": [
>         {
>             "type": "cache-address-parity",
>             "header": [ 3, 4]
>         },
>         {
>             "type": "cache-data-parity",
>             "header": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
>         },
>         {
>             "type": "internal",
>             "header": [ 1, 2, 4]
>         }
>         ]
>   }}
> ...
> { "execute": "cxl-inject-correctable-error",
>     "arguments": {
>         "path": "/machine/peripheral/cxl-pmem0",
>         "type": "physical"
>     } }
>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

[...]

> diff --git a/qapi/cxl.json b/qapi/cxl.json
> new file mode 100644
> index 0000000000..ac7e167fa2
> --- /dev/null
> +++ b/qapi/cxl.json
> @@ -0,0 +1,118 @@
> +# -*- Mode: Python -*-
> +# vim: filetype=python
> +
> +##
> +# = CXL devices
> +##
> +
> +##
> +# @CxlUncorErrorType:
> +#
> +# Type of uncorrectable CXL error to inject. These errors are reported via
> +# an AER uncorrectable internal error with additional information logged at
> +# the CXL device.
> +#
> +# @cache-data-parity: Data error such as data parity or data ECC error CXL.cache
> +# @cache-address-parity: Address parity or other errors associated with the
> +#                        address field on CXL.cache
> +# @cache-be-parity: Byte enable parity or other byte enable errors on CXL.cache
> +# @cache-data-ecc: ECC error on CXL.cache
> +# @mem-data-parity: Data error such as data parity or data ECC error on CXL.mem
> +# @mem-address-parity: Address parity or other errors associated with the
> +#                      address field on CXL.mem
> +# @mem-be-parity: Byte enable parity or other byte enable errors on CXL.mem.
> +# @mem-data-ecc: Data ECC error on CXL.mem.
> +# @reinit-threshold: REINIT threshold hit.
> +# @rsvd-encoding: Received unrecognized encoding.
> +# @poison-received: Received poison from the peer.
> +# @receiver-overflow: Buffer overflows (first 3 bits of header log indicate which)
> +# @internal: Component specific error
> +# @cxl-ide-tx: Integrity and data encryption tx error.
> +# @cxl-ide-rx: Integrity and data encryption rx error.
> +##
> +
> +{ 'enum': 'CxlUncorErrorType',
> +  'data': ['cache-data-parity',
> +           'cache-address-parity',
> +           'cache-be-parity',
> +           'cache-data-ecc',
> +           'mem-data-parity',
> +           'mem-address-parity',
> +           'mem-be-parity',
> +           'mem-data-ecc',
> +           'reinit-threshold',
> +           'rsvd-encoding',
> +           'poison-received',
> +           'receiver-overflow',
> +           'internal',
> +           'cxl-ide-tx',
> +           'cxl-ide-rx'
> +           ]
> + }
> +
> +##
> +# @CXLUncorErrorRecord:
> +#
> +# Record of a single error including header log.
> +#
> +# @type: Type of error
> +# @header: 16 DWORD of header.
> +##
> +{ 'struct': 'CXLUncorErrorRecord',
> +  'data': {
> +      'type': 'CxlUncorErrorType',
> +      'header': [ 'uint32' ]
> +  }
> +}
> +
> +##
> +# @cxl-inject-uncorrectable-errors:
> +#
> +# Command to allow injection of multiple errors in one go. This allows testing
> +# of multiple header log handling in the OS.
> +#
> +# @path: CXL Type 3 device canonical QOM path
> +# @errors: Errors to inject
> +##
> +{ 'command': 'cxl-inject-uncorrectable-errors',
> +  'data': { 'path': 'str',
> +             'errors': [ 'CXLUncorErrorRecord' ] }}
> +
> +##
> +# @CxlCorErrorType:
> +#
> +# Type of CXL correctable error to inject
> +#
> +# @cache-data-ecc: Data ECC error on CXL.cache
> +# @mem-data-ecc: Data ECC error on CXL.mem

Missing:

   # @retry-threshold: ...

I need suitable description text.  Can you help me?

> +# @crc-threshold: Component specific and applicable to 68 byte Flit mode only.
> +# @cache-poison-received: Received poison from a peer on CXL.cache.
> +# @mem-poison-received: Received poison from a peer on CXL.mem
> +# @physical: Received error indication from the physical layer.
> +##
> +{ 'enum': 'CxlCorErrorType',
> +  'data': ['cache-data-ecc',
> +           'mem-data-ecc',
> +           'crc-threshold',
> +           'retry-threshold',
> +           'cache-poison-received',
> +           'mem-poison-received',
> +           'physical']
> +}
> +
> +##
> +# @cxl-inject-correctable-error:
> +#
> +# Command to inject a single correctable error.  Multiple error injection
> +# of this error type is not interesting as there is no associated header log.
> +# These errors are reported via AER as a correctable internal error, with
> +# additional detail available from the CXL device.
> +#
> +# @path: CXL Type 3 device canonical QOM path
> +# @type: Type of error.
> +##
> +{ 'command': 'cxl-inject-correctable-error',
> +  'data': { 'path': 'str',
> +            'type': 'CxlCorErrorType'
> +  }
> +}

[...]
Jonathan Cameron Oct. 31, 2023, 5:55 p.m. UTC | #15
On Fri, 27 Oct 2023 06:54:39 +0200
Markus Armbruster <armbru@redhat.com> wrote:

> I'm trying to fill in QMP documentation holes, and found one in commit
> 415442a1b4a (this patch).  Details inline.
> 
> Jonathan Cameron <Jonathan.Cameron@huawei.com> writes:
> 
> > CXL uses PCI AER Internal errors to signal to the host that an error has
> > occurred. The host can then read more detailed status from the CXL RAS
> > capability.
> >
> > For uncorrectable errors: support multiple injection in one operation
> > as this is needed to reliably test multiple header logging support in an
> > OS. The equivalent feature doesn't exist for correctable errors, so only
> > one error need be injected at a time.
> >
> > Note:
> >  - Header content needs to be manually specified in a fashion that
> >    matches the specification for what can be in the header for each
> >    error type.
> >
> > Injection via QMP:
> > { "execute": "qmp_capabilities" }
> > ...
> > { "execute": "cxl-inject-uncorrectable-errors",
> >   "arguments": {
> >     "path": "/machine/peripheral/cxl-pmem0",
> >     "errors": [
> >         {
> >             "type": "cache-address-parity",
> >             "header": [ 3, 4]
> >         },
> >         {
> >             "type": "cache-data-parity",
> >             "header": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
> >         },
> >         {
> >             "type": "internal",
> >             "header": [ 1, 2, 4]
> >         }
> >         ]
> >   }}
> > ...
> > { "execute": "cxl-inject-correctable-error",
> >     "arguments": {
> >         "path": "/machine/peripheral/cxl-pmem0",
> >         "type": "physical"
> >     } }
> >
> > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>  
> 
> [...]
> 
> > diff --git a/qapi/cxl.json b/qapi/cxl.json
> > new file mode 100644
> > index 0000000000..ac7e167fa2
> > --- /dev/null
> > +++ b/qapi/cxl.json
> > @@ -0,0 +1,118 @@
> > +# -*- Mode: Python -*-
> > +# vim: filetype=python
> > +
> > +##
> > +# = CXL devices
> > +##
> > +
> > +##
> > +# @CxlUncorErrorType:
> > +#
> > +# Type of uncorrectable CXL error to inject. These errors are reported via
> > +# an AER uncorrectable internal error with additional information logged at
> > +# the CXL device.
> > +#
> > +# @cache-data-parity: Data error such as data parity or data ECC error CXL.cache
> > +# @cache-address-parity: Address parity or other errors associated with the
> > +#                        address field on CXL.cache
> > +# @cache-be-parity: Byte enable parity or other byte enable errors on CXL.cache
> > +# @cache-data-ecc: ECC error on CXL.cache
> > +# @mem-data-parity: Data error such as data parity or data ECC error on CXL.mem
> > +# @mem-address-parity: Address parity or other errors associated with the
> > +#                      address field on CXL.mem
> > +# @mem-be-parity: Byte enable parity or other byte enable errors on CXL.mem.
> > +# @mem-data-ecc: Data ECC error on CXL.mem.
> > +# @reinit-threshold: REINIT threshold hit.
> > +# @rsvd-encoding: Received unrecognized encoding.
> > +# @poison-received: Received poison from the peer.
> > +# @receiver-overflow: Buffer overflows (first 3 bits of header log indicate which)
> > +# @internal: Component specific error
> > +# @cxl-ide-tx: Integrity and data encryption tx error.
> > +# @cxl-ide-rx: Integrity and data encryption rx error.
> > +##
> > +
> > +{ 'enum': 'CxlUncorErrorType',
> > +  'data': ['cache-data-parity',
> > +           'cache-address-parity',
> > +           'cache-be-parity',
> > +           'cache-data-ecc',
> > +           'mem-data-parity',
> > +           'mem-address-parity',
> > +           'mem-be-parity',
> > +           'mem-data-ecc',
> > +           'reinit-threshold',
> > +           'rsvd-encoding',
> > +           'poison-received',
> > +           'receiver-overflow',
> > +           'internal',
> > +           'cxl-ide-tx',
> > +           'cxl-ide-rx'
> > +           ]
> > + }
> > +
> > +##
> > +# @CXLUncorErrorRecord:
> > +#
> > +# Record of a single error including header log.
> > +#
> > +# @type: Type of error
> > +# @header: 16 DWORD of header.
> > +##
> > +{ 'struct': 'CXLUncorErrorRecord',
> > +  'data': {
> > +      'type': 'CxlUncorErrorType',
> > +      'header': [ 'uint32' ]
> > +  }
> > +}
> > +
> > +##
> > +# @cxl-inject-uncorrectable-errors:
> > +#
> > +# Command to allow injection of multiple errors in one go. This allows testing
> > +# of multiple header log handling in the OS.
> > +#
> > +# @path: CXL Type 3 device canonical QOM path
> > +# @errors: Errors to inject
> > +##
> > +{ 'command': 'cxl-inject-uncorrectable-errors',
> > +  'data': { 'path': 'str',
> > +             'errors': [ 'CXLUncorErrorRecord' ] }}
> > +
> > +##
> > +# @CxlCorErrorType:
> > +#
> > +# Type of CXL correctable error to inject
> > +#
> > +# @cache-data-ecc: Data ECC error on CXL.cache
> > +# @mem-data-ecc: Data ECC error on CXL.mem  
> 
> Missing:
> 
>    # @retry-threshold: ...
> 
> I need suitable description text.  Can you help me?

Spec says:
"Retry Threshold Hit. (NUM_RETRY>=MAX_NUM_RETRY).
See Section 4.2.8.5.1 for the definitions of NUM_RETRY and MAX_NUM_RETRY."

Following the reference:
"NUM_RETRY: This counter is used to count the number of RETRY.Req requests
sent to retry the same flit. The counter remains enabled during the whole retry
sequence (state is not RETRY_LOCAL_NORMAL). It is reset to 0 at initialization. It is
also reset to 0 when a RETRY.Ack sequence is received with the Empty bit set or
whenever the LRSM state is RETRY_LOCAL_NORMAL and an error-free retryable flit
is received. The counter is incremented whenever the LRSM state changes from
RETRY_LLRREQ to RETRY_LOCAL_IDLE. If the counter reaches a threshold (called
MAX_NUM_RETRY), then the local retry state machine transitions to the
RETRY_PHY_REINIT. The NUM_RETRY counter is also reset when the Physical layer
exits from LTSSM recovery state (the LRSM transition through RETRY_PHY_REINIT
to RETRY_LLRREQ)."

So based on my failure to understand much of that beyond it has something
to do with low level retries, maybe just

"Number of times the retry threshold was hit."

Thanks for tidying this up!
?


> 
> > +# @crc-threshold: Component specific and applicable to 68 byte Flit mode only.
> > +# @cache-poison-received: Received poison from a peer on CXL.cache.
> > +# @mem-poison-received: Received poison from a peer on CXL.mem
> > +# @physical: Received error indication from the physical layer.
> > +##
> > +{ 'enum': 'CxlCorErrorType',
> > +  'data': ['cache-data-ecc',
> > +           'mem-data-ecc',
> > +           'crc-threshold',
> > +           'retry-threshold',
> > +           'cache-poison-received',
> > +           'mem-poison-received',
> > +           'physical']
> > +}
> > +
> > +##
> > +# @cxl-inject-correctable-error:
> > +#
> > +# Command to inject a single correctable error.  Multiple error injection
> > +# of this error type is not interesting as there is no associated header log.
> > +# These errors are reported via AER as a correctable internal error, with
> > +# additional detail available from the CXL device.
> > +#
> > +# @path: CXL Type 3 device canonical QOM path
> > +# @type: Type of error.
> > +##
> > +{ 'command': 'cxl-inject-correctable-error',
> > +  'data': { 'path': 'str',
> > +            'type': 'CxlCorErrorType'
> > +  }
> > +}  
> 
> [...]
> 
>
Markus Armbruster Nov. 2, 2023, 6:47 a.m. UTC | #16
Jonathan Cameron <Jonathan.Cameron@Huawei.com> writes:

> On Fri, 27 Oct 2023 06:54:39 +0200
> Markus Armbruster <armbru@redhat.com> wrote:
>
>> I'm trying to fill in QMP documentation holes, and found one in commit
>> 415442a1b4a (this patch).  Details inline.
>> 
>> Jonathan Cameron <Jonathan.Cameron@huawei.com> writes:
>> 
>> > CXL uses PCI AER Internal errors to signal to the host that an error has
>> > occurred. The host can then read more detailed status from the CXL RAS
>> > capability.
>> >
>> > For uncorrectable errors: support multiple injection in one operation
>> > as this is needed to reliably test multiple header logging support in an
>> > OS. The equivalent feature doesn't exist for correctable errors, so only
>> > one error need be injected at a time.
>> >
>> > Note:
>> >  - Header content needs to be manually specified in a fashion that
>> >    matches the specification for what can be in the header for each
>> >    error type.
>> >
>> > Injection via QMP:
>> > { "execute": "qmp_capabilities" }
>> > ...
>> > { "execute": "cxl-inject-uncorrectable-errors",
>> >   "arguments": {
>> >     "path": "/machine/peripheral/cxl-pmem0",
>> >     "errors": [
>> >         {
>> >             "type": "cache-address-parity",
>> >             "header": [ 3, 4]
>> >         },
>> >         {
>> >             "type": "cache-data-parity",
>> >             "header": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
>> >         },
>> >         {
>> >             "type": "internal",
>> >             "header": [ 1, 2, 4]
>> >         }
>> >         ]
>> >   }}
>> > ...
>> > { "execute": "cxl-inject-correctable-error",
>> >     "arguments": {
>> >         "path": "/machine/peripheral/cxl-pmem0",
>> >         "type": "physical"
>> >     } }
>> >
>> > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>  
>> 
>> [...]
>> 
>> > diff --git a/qapi/cxl.json b/qapi/cxl.json
>> > new file mode 100644
>> > index 0000000000..ac7e167fa2
>> > --- /dev/null
>> > +++ b/qapi/cxl.json
>> > @@ -0,0 +1,118 @@
>> > +# -*- Mode: Python -*-
>> > +# vim: filetype=python
>> > +
>> > +##
>> > +# = CXL devices
>> > +##
>> > +
>> > +##
>> > +# @CxlUncorErrorType:
>> > +#
>> > +# Type of uncorrectable CXL error to inject. These errors are reported via
>> > +# an AER uncorrectable internal error with additional information logged at
>> > +# the CXL device.
>> > +#
>> > +# @cache-data-parity: Data error such as data parity or data ECC error CXL.cache
>> > +# @cache-address-parity: Address parity or other errors associated with the
>> > +#                        address field on CXL.cache
>> > +# @cache-be-parity: Byte enable parity or other byte enable errors on CXL.cache
>> > +# @cache-data-ecc: ECC error on CXL.cache
>> > +# @mem-data-parity: Data error such as data parity or data ECC error on CXL.mem
>> > +# @mem-address-parity: Address parity or other errors associated with the
>> > +#                      address field on CXL.mem
>> > +# @mem-be-parity: Byte enable parity or other byte enable errors on CXL.mem.
>> > +# @mem-data-ecc: Data ECC error on CXL.mem.
>> > +# @reinit-threshold: REINIT threshold hit.
>> > +# @rsvd-encoding: Received unrecognized encoding.
>> > +# @poison-received: Received poison from the peer.
>> > +# @receiver-overflow: Buffer overflows (first 3 bits of header log indicate which)
>> > +# @internal: Component specific error
>> > +# @cxl-ide-tx: Integrity and data encryption tx error.
>> > +# @cxl-ide-rx: Integrity and data encryption rx error.
>> > +##
>> > +
>> > +{ 'enum': 'CxlUncorErrorType',
>> > +  'data': ['cache-data-parity',
>> > +           'cache-address-parity',
>> > +           'cache-be-parity',
>> > +           'cache-data-ecc',
>> > +           'mem-data-parity',
>> > +           'mem-address-parity',
>> > +           'mem-be-parity',
>> > +           'mem-data-ecc',
>> > +           'reinit-threshold',
>> > +           'rsvd-encoding',
>> > +           'poison-received',
>> > +           'receiver-overflow',
>> > +           'internal',
>> > +           'cxl-ide-tx',
>> > +           'cxl-ide-rx'
>> > +           ]
>> > + }
>> > +
>> > +##
>> > +# @CXLUncorErrorRecord:
>> > +#
>> > +# Record of a single error including header log.
>> > +#
>> > +# @type: Type of error
>> > +# @header: 16 DWORD of header.
>> > +##
>> > +{ 'struct': 'CXLUncorErrorRecord',
>> > +  'data': {
>> > +      'type': 'CxlUncorErrorType',
>> > +      'header': [ 'uint32' ]
>> > +  }
>> > +}
>> > +
>> > +##
>> > +# @cxl-inject-uncorrectable-errors:
>> > +#
>> > +# Command to allow injection of multiple errors in one go. This allows testing
>> > +# of multiple header log handling in the OS.
>> > +#
>> > +# @path: CXL Type 3 device canonical QOM path
>> > +# @errors: Errors to inject
>> > +##
>> > +{ 'command': 'cxl-inject-uncorrectable-errors',
>> > +  'data': { 'path': 'str',
>> > +             'errors': [ 'CXLUncorErrorRecord' ] }}
>> > +
>> > +##
>> > +# @CxlCorErrorType:
>> > +#
>> > +# Type of CXL correctable error to inject
>> > +#
>> > +# @cache-data-ecc: Data ECC error on CXL.cache
>> > +# @mem-data-ecc: Data ECC error on CXL.mem  
>> 
>> Missing:
>> 
>>    # @retry-threshold: ...
>> 
>> I need suitable description text.  Can you help me?
>
> Spec says:
> "Retry Threshold Hit. (NUM_RETRY>=MAX_NUM_RETRY).
> See Section 4.2.8.5.1 for the definitions of NUM_RETRY and MAX_NUM_RETRY."
>
> Following the reference:
> "NUM_RETRY: This counter is used to count the number of RETRY.Req requests
> sent to retry the same flit. The counter remains enabled during the whole retry
> sequence (state is not RETRY_LOCAL_NORMAL). It is reset to 0 at initialization. It is
> also reset to 0 when a RETRY.Ack sequence is received with the Empty bit set or
> whenever the LRSM state is RETRY_LOCAL_NORMAL and an error-free retryable flit
> is received. The counter is incremented whenever the LRSM state changes from
> RETRY_LLRREQ to RETRY_LOCAL_IDLE. If the counter reaches a threshold (called
> MAX_NUM_RETRY), then the local retry state machine transitions to the
> RETRY_PHY_REINIT. The NUM_RETRY counter is also reset when the Physical layer
> exits from LTSSM recovery state (the LRSM transition through RETRY_PHY_REINIT
> to RETRY_LLRREQ)."
>
> So based on my failure to understand much of that beyond it has something
> to do with low level retries, maybe just
>
> "Number of times the retry threshold was hit."

Sold!  Thanks for your help.

> Thanks for tidying this up!

You're welcome!

I intend post the patch as part of a series filling in documentation
holes all over the place.  Will take some time, I'm afraid.

[...]
diff mbox series

Patch

diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
index 737b4764b9..b665d4f565 100644
--- a/hw/cxl/cxl-component-utils.c
+++ b/hw/cxl/cxl-component-utils.c
@@ -142,16 +142,18 @@  static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk)
      * be handled as RO.
      */
     stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, 0);
+    stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_STATUS, 0x1cfff);
     /* Bits 12-13 and 17-31 reserved in CXL 2.0 */
     stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK, 0x1cfff);
     stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_MASK, 0x1cfff);
     stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_SEVERITY, 0x1cfff);
     stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_SEVERITY, 0x1cfff);
     stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, 0);
+    stl_le_p(write_msk + R_CXL_RAS_COR_ERR_STATUS, 0x7f);
     stl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK, 0x7f);
     stl_le_p(write_msk + R_CXL_RAS_COR_ERR_MASK, 0x7f);
     /* CXL switches and devices must set */
-    stl_le_p(reg_state + R_CXL_RAS_ERR_CAP_CTRL, 0x00);
+    stl_le_p(reg_state + R_CXL_RAS_ERR_CAP_CTRL, 0x200);
 }
 
 static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 6cdd988d1d..abe60b362c 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1,6 +1,7 @@ 
 #include "qemu/osdep.h"
 #include "qemu/units.h"
 #include "qemu/error-report.h"
+#include "qapi/qapi-commands-cxl.h"
 #include "hw/mem/memory-device.h"
 #include "hw/mem/pc-dimm.h"
 #include "hw/pci/pci.h"
@@ -323,6 +324,66 @@  static void hdm_decoder_commit(CXLType3Dev *ct3d, int which)
     ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
 }
 
+static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err)
+{
+    switch (qmp_err) {
+    case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_PARITY:
+        return CXL_RAS_UNC_ERR_CACHE_DATA_PARITY;
+    case CXL_UNCOR_ERROR_TYPE_CACHE_ADDRESS_PARITY:
+        return CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY;
+    case CXL_UNCOR_ERROR_TYPE_CACHE_BE_PARITY:
+        return CXL_RAS_UNC_ERR_CACHE_BE_PARITY;
+    case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_ECC:
+        return CXL_RAS_UNC_ERR_CACHE_DATA_ECC;
+    case CXL_UNCOR_ERROR_TYPE_MEM_DATA_PARITY:
+        return CXL_RAS_UNC_ERR_MEM_DATA_PARITY;
+    case CXL_UNCOR_ERROR_TYPE_MEM_ADDRESS_PARITY:
+        return CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY;
+    case CXL_UNCOR_ERROR_TYPE_MEM_BE_PARITY:
+        return CXL_RAS_UNC_ERR_MEM_BE_PARITY;
+    case CXL_UNCOR_ERROR_TYPE_MEM_DATA_ECC:
+        return CXL_RAS_UNC_ERR_MEM_DATA_ECC;
+    case CXL_UNCOR_ERROR_TYPE_REINIT_THRESHOLD:
+        return CXL_RAS_UNC_ERR_REINIT_THRESHOLD;
+    case CXL_UNCOR_ERROR_TYPE_RSVD_ENCODING:
+        return CXL_RAS_UNC_ERR_RSVD_ENCODING;
+    case CXL_UNCOR_ERROR_TYPE_POISON_RECEIVED:
+        return CXL_RAS_UNC_ERR_POISON_RECEIVED;
+    case CXL_UNCOR_ERROR_TYPE_RECEIVER_OVERFLOW:
+        return CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW;
+    case CXL_UNCOR_ERROR_TYPE_INTERNAL:
+        return CXL_RAS_UNC_ERR_INTERNAL;
+    case CXL_UNCOR_ERROR_TYPE_CXL_IDE_TX:
+        return CXL_RAS_UNC_ERR_CXL_IDE_TX;
+    case CXL_UNCOR_ERROR_TYPE_CXL_IDE_RX:
+        return CXL_RAS_UNC_ERR_CXL_IDE_RX;
+    default:
+        return -EINVAL;
+    }
+}
+
+static int ct3d_qmp_cor_err_to_cxl(CxlCorErrorType qmp_err)
+{
+    switch (qmp_err) {
+    case CXL_COR_ERROR_TYPE_CACHE_DATA_ECC:
+        return CXL_RAS_COR_ERR_CACHE_DATA_ECC;
+    case CXL_COR_ERROR_TYPE_MEM_DATA_ECC:
+        return CXL_RAS_COR_ERR_MEM_DATA_ECC;
+    case CXL_COR_ERROR_TYPE_CRC_THRESHOLD:
+        return CXL_RAS_COR_ERR_CRC_THRESHOLD;
+    case CXL_COR_ERROR_TYPE_RETRY_THRESHOLD:
+        return CXL_RAS_COR_ERR_RETRY_THRESHOLD;
+    case CXL_COR_ERROR_TYPE_CACHE_POISON_RECEIVED:
+        return CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED;
+    case CXL_COR_ERROR_TYPE_MEM_POISON_RECEIVED:
+        return CXL_RAS_COR_ERR_MEM_POISON_RECEIVED;
+    case CXL_COR_ERROR_TYPE_PHYSICAL:
+        return CXL_RAS_COR_ERR_PHYSICAL;
+    default:
+        return -EINVAL;
+    }
+}
+
 static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
                            unsigned size)
 {
@@ -341,6 +402,83 @@  static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
         which_hdm = 0;
         break;
+    case A_CXL_RAS_UNC_ERR_STATUS:
+    {
+        uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
+        uint32_t fe = FIELD_EX32(capctrl, CXL_RAS_ERR_CAP_CTRL, FIRST_ERROR_POINTER);
+        CXLError *cxl_err;
+        uint32_t unc_err;
+
+        /*
+         * If single bit written that corresponds to the first error
+         * pointer being cleared, update the status and header log.
+         */
+        if (!QTAILQ_EMPTY(&ct3d->error_list)) {
+            if ((1 << fe) ^ value) {
+                CXLError *cxl_next;
+                /*
+                 * Software is using wrong flow for multiple header recording
+                 * Following behavior in PCIe r6.0 and assuming multiple
+                 * header support. Implementation defined choice to clear all
+                 * matching records if more than one bit set - which corresponds
+                 * closest to behavior of hardware not capable of multiple
+                 * header recording.
+                 */
+                QTAILQ_FOREACH_SAFE(cxl_err, &ct3d->error_list, node, cxl_next) {
+                    if ((1 << cxl_err->type) & value) {
+                        QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
+                        g_free(cxl_err);
+                    }
+                }
+            } else {
+                /* Done with previous FE, so drop from list */
+                cxl_err = QTAILQ_FIRST(&ct3d->error_list);
+                QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
+                g_free(cxl_err);
+            }
+
+            /*
+             * If there is another FE, then put that in place and update
+             * the header log
+             */
+            if (!QTAILQ_EMPTY(&ct3d->error_list)) {
+                uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
+                int i;
+
+                cxl_err = QTAILQ_FIRST(&ct3d->error_list);
+                for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
+                    stl_le_p(header_log + i, cxl_err->header[i]);
+                }
+                capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
+                                     FIRST_ERROR_POINTER, cxl_err->type);
+            } else {
+                /*
+                 * If no more errors, then follow recomendation of PCI spec
+                 * r6.0 6.2.4.2 to set the first error pointer to a status
+                 * bit that will never be used.
+                 */
+                capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
+                                     FIRST_ERROR_POINTER,
+                                     CXL_RAS_UNC_ERR_CXL_UNUSED);
+            }
+            stl_le_p((uint8_t *)cache_mem + A_CXL_RAS_ERR_CAP_CTRL, capctrl);
+        }
+        unc_err = 0;
+        QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
+            unc_err |= 1 << cxl_err->type;
+        }
+        stl_le_p((uint8_t *)cache_mem + offset, unc_err);
+
+        return;
+    }
+    case A_CXL_RAS_COR_ERR_STATUS:
+    {
+        uint32_t rw1c = value;
+        uint32_t temp = ldl_le_p((uint8_t *)cache_mem + offset);
+        temp &= ~rw1c;
+        stl_le_p((uint8_t *)cache_mem + offset, temp);
+        return;
+    }
     default:
         break;
     }
@@ -404,6 +542,8 @@  static void ct3_realize(PCIDevice *pci_dev, Error **errp)
     unsigned short msix_num = 1;
     int i, rc;
 
+    QTAILQ_INIT(&ct3d->error_list);
+
     if (!cxl_setup_memory(ct3d, errp)) {
         return;
     }
@@ -631,6 +771,147 @@  static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
      */
 }
 
+/* For uncorrectable errors include support for multiple header recording */
+void qmp_cxl_inject_uncorrectable_errors(const char *path,
+                                         CXLUncorErrorRecordList *errors,
+                                         Error **errp)
+{
+    Object *obj = object_resolve_path(path, NULL);
+    static PCIEAERErr err = {};
+    CXLType3Dev *ct3d;
+    CXLError *cxl_err;
+    uint32_t *reg_state;
+    uint32_t unc_err;
+    bool first;
+
+    if (!obj) {
+        error_setg(errp, "Unable to resolve path");
+        return;
+    }
+
+    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
+        error_setg(errp, "Path does not point to a CXL type 3 device");
+        return;
+    }
+
+    err.status = PCI_ERR_UNC_INTN;
+    err.source_id = pci_requester_id(PCI_DEVICE(obj));
+    err.flags = 0;
+
+    ct3d = CXL_TYPE3(obj);
+
+    first = QTAILQ_EMPTY(&ct3d->error_list);
+    reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
+    while (errors) {
+        uint32List *header = errors->value->header;
+        uint8_t header_count = 0;
+        int cxl_err_code;
+
+        cxl_err_code = ct3d_qmp_uncor_err_to_cxl(errors->value->type);
+        if (cxl_err_code < 0) {
+            error_setg(errp, "Unknown error code");
+            return;
+        }
+
+        /* If the error is masked, nothing to do here */
+        if (!((1 << cxl_err_code) &
+              ~ldl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK))) {
+            errors = errors->next;
+            continue;
+        }
+
+        cxl_err = g_malloc0(sizeof(*cxl_err));
+        if (!cxl_err) {
+            return;
+        }
+
+        cxl_err->type = cxl_err_code;
+        while (header && header_count < 32) {
+            cxl_err->header[header_count++] = header->value;
+            header = header->next;
+        }
+        if (header_count > 32) {
+            error_setg(errp, "Header must be 32 DWORD or less");
+            return;
+        }
+        QTAILQ_INSERT_TAIL(&ct3d->error_list, cxl_err, node);
+
+        errors = errors->next;
+    }
+
+    if (first && !QTAILQ_EMPTY(&ct3d->error_list)) {
+        uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
+        uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
+        uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
+        int i;
+
+        cxl_err = QTAILQ_FIRST(&ct3d->error_list);
+        for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
+            stl_le_p(header_log + i, cxl_err->header[i]);
+        }
+
+        capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
+                             FIRST_ERROR_POINTER, cxl_err->type);
+        stl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL, capctrl);
+    }
+
+    unc_err = 0;
+    QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
+        unc_err |= (1 << cxl_err->type);
+    }
+    if (!unc_err) {
+        return;
+    }
+
+    stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, unc_err);
+    pcie_aer_inject_error(PCI_DEVICE(obj), &err);
+
+    return;
+}
+
+void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
+                                      Error **errp)
+{
+    static PCIEAERErr err = {};
+    Object *obj = object_resolve_path(path, NULL);
+    CXLType3Dev *ct3d;
+    uint32_t *reg_state;
+    uint32_t cor_err;
+    int cxl_err_type;
+
+    if (!obj) {
+        error_setg(errp, "Unable to resolve path");
+        return;
+    }
+    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
+        error_setg(errp, "Path does not point to a CXL type 3 device");
+        return;
+    }
+
+    err.status = PCI_ERR_COR_INTERNAL;
+    err.source_id = pci_requester_id(PCI_DEVICE(obj));
+    err.flags = PCIE_AER_ERR_IS_CORRECTABLE;
+
+    ct3d = CXL_TYPE3(obj);
+    reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
+    cor_err = ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS);
+
+    cxl_err_type = ct3d_qmp_cor_err_to_cxl(type);
+    if (cxl_err_type < 0) {
+        error_setg(errp, "Invalid COR error");
+        return;
+    }
+    /* If the error is masked, nothting to do here */
+    if (!((1 << cxl_err_type) & ~ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK))) {
+        return;
+    }
+
+    cor_err |= (1 << cxl_err_type);
+    stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, cor_err);
+
+    pcie_aer_inject_error(PCI_DEVICE(obj), &err);
+}
+
 static void ct3_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
new file mode 100644
index 0000000000..b6b51ced54
--- /dev/null
+++ b/hw/mem/cxl_type3_stubs.c
@@ -0,0 +1,10 @@ 
+
+#include "qemu/osdep.h"
+#include "qapi/qapi-commands-cxl.h"
+
+void qmp_cxl_inject_uncorrectable_errors(const char *path,
+                                         CXLUncorErrorRecordList *errors,
+                                         Error **errp) {}
+
+void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
+                                      Error **errp) {}
diff --git a/hw/mem/meson.build b/hw/mem/meson.build
index 609b2b36fc..56c2618b84 100644
--- a/hw/mem/meson.build
+++ b/hw/mem/meson.build
@@ -4,6 +4,8 @@  mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c'))
 mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c'))
 mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c'))
 mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c'))
+softmmu_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_false: files('cxl_type3_stubs.c'))
+softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('cxl_type3_stubs.c'))
 
 softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss)
 
diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
index 692d7a5507..ec4203b83f 100644
--- a/include/hw/cxl/cxl_component.h
+++ b/include/hw/cxl/cxl_component.h
@@ -65,11 +65,37 @@  CXLx_CAPABILITY_HEADER(SNOOP, 0x14)
 #define CXL_RAS_REGISTERS_OFFSET 0x80
 #define CXL_RAS_REGISTERS_SIZE   0x58
 REG32(CXL_RAS_UNC_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET)
+#define CXL_RAS_UNC_ERR_CACHE_DATA_PARITY 0
+#define CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY 1
+#define CXL_RAS_UNC_ERR_CACHE_BE_PARITY 2
+#define CXL_RAS_UNC_ERR_CACHE_DATA_ECC 3
+#define CXL_RAS_UNC_ERR_MEM_DATA_PARITY 4
+#define CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY 5
+#define CXL_RAS_UNC_ERR_MEM_BE_PARITY 6
+#define CXL_RAS_UNC_ERR_MEM_DATA_ECC 7
+#define CXL_RAS_UNC_ERR_REINIT_THRESHOLD 8
+#define CXL_RAS_UNC_ERR_RSVD_ENCODING 9
+#define CXL_RAS_UNC_ERR_POISON_RECEIVED 10
+#define CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW 11
+#define CXL_RAS_UNC_ERR_INTERNAL 14
+#define CXL_RAS_UNC_ERR_CXL_IDE_TX 15
+#define CXL_RAS_UNC_ERR_CXL_IDE_RX 16
+#define CXL_RAS_UNC_ERR_CXL_UNUSED 63 /* Magic value */
 REG32(CXL_RAS_UNC_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x4)
 REG32(CXL_RAS_UNC_ERR_SEVERITY, CXL_RAS_REGISTERS_OFFSET + 0x8)
 REG32(CXL_RAS_COR_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET + 0xc)
+#define CXL_RAS_COR_ERR_CACHE_DATA_ECC 0
+#define CXL_RAS_COR_ERR_MEM_DATA_ECC 1
+#define CXL_RAS_COR_ERR_CRC_THRESHOLD 2
+#define CXL_RAS_COR_ERR_RETRY_THRESHOLD 3
+#define CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED 4
+#define CXL_RAS_COR_ERR_MEM_POISON_RECEIVED 5
+#define CXL_RAS_COR_ERR_PHYSICAL 6
 REG32(CXL_RAS_COR_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x10)
 REG32(CXL_RAS_ERR_CAP_CTRL, CXL_RAS_REGISTERS_OFFSET + 0x14)
+    FIELD(CXL_RAS_ERR_CAP_CTRL, FIRST_ERROR_POINTER, 0, 6)
+REG32(CXL_RAS_ERR_HEADER0, CXL_RAS_REGISTERS_OFFSET + 0x18)
+#define CXL_RAS_ERR_HEADER_NUM 32
 /* Offset 0x18 - 0x58 reserved for RAS logs */
 
 /* 8.2.5.10 - CXL Security Capability Structure */
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 7e5ad65c1d..d589f78202 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -232,6 +232,14 @@  REG64(CXL_MEM_DEV_STS, 0)
     FIELD(CXL_MEM_DEV_STS, MBOX_READY, 4, 1)
     FIELD(CXL_MEM_DEV_STS, RESET_NEEDED, 5, 3)
 
+typedef struct CXLError {
+    QTAILQ_ENTRY(CXLError) node;
+    int type; /* Error code as per FE definition */
+    uint32_t header[32];
+} CXLError;
+
+typedef QTAILQ_HEAD(, CXLError) CXLErrorList;
+
 struct CXLType3Dev {
     /* Private */
     PCIDevice parent_obj;
@@ -248,6 +256,9 @@  struct CXLType3Dev {
 
     /* DOE */
     DOECap doe_cdat;
+
+    /* Error injection */
+    CXLErrorList error_list;
 };
 
 #define TYPE_CXL_TYPE3 "cxl-type3"
diff --git a/qapi/cxl.json b/qapi/cxl.json
new file mode 100644
index 0000000000..ac7e167fa2
--- /dev/null
+++ b/qapi/cxl.json
@@ -0,0 +1,118 @@ 
+# -*- Mode: Python -*-
+# vim: filetype=python
+
+##
+# = CXL devices
+##
+
+##
+# @CxlUncorErrorType:
+#
+# Type of uncorrectable CXL error to inject. These errors are reported via
+# an AER uncorrectable internal error with additional information logged at
+# the CXL device.
+#
+# @cache-data-parity: Data error such as data parity or data ECC error CXL.cache
+# @cache-address-parity: Address parity or other errors associated with the
+#                        address field on CXL.cache
+# @cache-be-parity: Byte enable parity or other byte enable errors on CXL.cache
+# @cache-data-ecc: ECC error on CXL.cache
+# @mem-data-parity: Data error such as data parity or data ECC error on CXL.mem
+# @mem-address-parity: Address parity or other errors associated with the
+#                      address field on CXL.mem
+# @mem-be-parity: Byte enable parity or other byte enable errors on CXL.mem.
+# @mem-data-ecc: Data ECC error on CXL.mem.
+# @reinit-threshold: REINIT threshold hit.
+# @rsvd-encoding: Received unrecognized encoding.
+# @poison-received: Received poison from the peer.
+# @receiver-overflow: Buffer overflows (first 3 bits of header log indicate which)
+# @internal: Component specific error
+# @cxl-ide-tx: Integrity and data encryption tx error.
+# @cxl-ide-rx: Integrity and data encryption rx error.
+##
+
+{ 'enum': 'CxlUncorErrorType',
+  'data': ['cache-data-parity',
+           'cache-address-parity',
+           'cache-be-parity',
+           'cache-data-ecc',
+           'mem-data-parity',
+           'mem-address-parity',
+           'mem-be-parity',
+           'mem-data-ecc',
+           'reinit-threshold',
+           'rsvd-encoding',
+           'poison-received',
+           'receiver-overflow',
+           'internal',
+           'cxl-ide-tx',
+           'cxl-ide-rx'
+           ]
+ }
+
+##
+# @CXLUncorErrorRecord:
+#
+# Record of a single error including header log.
+#
+# @type: Type of error
+# @header: 16 DWORD of header.
+##
+{ 'struct': 'CXLUncorErrorRecord',
+  'data': {
+      'type': 'CxlUncorErrorType',
+      'header': [ 'uint32' ]
+  }
+}
+
+##
+# @cxl-inject-uncorrectable-errors:
+#
+# Command to allow injection of multiple errors in one go. This allows testing
+# of multiple header log handling in the OS.
+#
+# @path: CXL Type 3 device canonical QOM path
+# @errors: Errors to inject
+##
+{ 'command': 'cxl-inject-uncorrectable-errors',
+  'data': { 'path': 'str',
+             'errors': [ 'CXLUncorErrorRecord' ] }}
+
+##
+# @CxlCorErrorType:
+#
+# Type of CXL correctable error to inject
+#
+# @cache-data-ecc: Data ECC error on CXL.cache
+# @mem-data-ecc: Data ECC error on CXL.mem
+# @crc-threshold: Component specific and applicable to 68 byte Flit mode only.
+# @cache-poison-received: Received poison from a peer on CXL.cache.
+# @mem-poison-received: Received poison from a peer on CXL.mem
+# @physical: Received error indication from the physical layer.
+##
+{ 'enum': 'CxlCorErrorType',
+  'data': ['cache-data-ecc',
+           'mem-data-ecc',
+           'crc-threshold',
+           'retry-threshold',
+           'cache-poison-received',
+           'mem-poison-received',
+           'physical']
+}
+
+##
+# @cxl-inject-correctable-error:
+#
+# Command to inject a single correctable error.  Multiple error injection
+# of this error type is not interesting as there is no associated header log.
+# These errors are reported via AER as a correctable internal error, with
+# additional detail available from the CXL device.
+#
+# @path: CXL Type 3 device canonical QOM path
+# @type: Type of error.
+##
+{ 'command': 'cxl-inject-correctable-error',
+  'data': { 'path': 'str',
+            'type': 'CxlCorErrorType'
+  }
+}
diff --git a/qapi/meson.build b/qapi/meson.build
index fbdb442fdf..73c3c8c31a 100644
--- a/qapi/meson.build
+++ b/qapi/meson.build
@@ -31,6 +31,7 @@  qapi_all_modules = [
   'compat',
   'control',
   'crypto',
+  'cxl',
   'dump',
   'error',
   'introspect',
diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
index f000b90744..079f2a402a 100644
--- a/qapi/qapi-schema.json
+++ b/qapi/qapi-schema.json
@@ -95,3 +95,4 @@ 
 { 'include': 'pci.json' }
 { 'include': 'stats.json' }
 { 'include': 'virtio.json' }
+{ 'include': 'cxl.json' }