diff mbox series

hw/cxl: Fix msix_notify: Assertion `vector < dev->msix_entries_nr`

Message ID 20241212085534.2669377-1-lizhijian@fujitsu.com (mailing list archive)
State New, archived
Headers show
Series hw/cxl: Fix msix_notify: Assertion `vector < dev->msix_entries_nr` | expand

Commit Message

Li Zhijian Dec. 12, 2024, 8:55 a.m. UTC
This assertion always happens when we sanitize the CXL memory device.
$ echo 1 > /sys/bus/cxl/devices/mem0/security/sanitize

It is incorrect to register an MSIX number beyond the device's capability.

Expand the device's MSIX to 10 and introduce the `request_msix_number()`
helper function to dynamically request an available MSIX number.

Fixes: 43efb0bfad2b ("hw/cxl/mbox: Wire up interrupts for background completion")
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
---
 hw/cxl/cxl-device-utils.c   |  3 ++-
 hw/mem/cxl_type3.c          | 15 ++++++++++++++-
 include/hw/cxl/cxl_device.h |  2 ++
 3 files changed, 18 insertions(+), 2 deletions(-)

Comments

Jonathan Cameron Dec. 12, 2024, 12:02 p.m. UTC | #1
On Thu, 12 Dec 2024 16:55:33 +0800
Li Zhijian via <qemu-devel@nongnu.org> wrote:

> This assertion always happens when we sanitize the CXL memory device.
> $ echo 1 > /sys/bus/cxl/devices/mem0/security/sanitize
> 
> It is incorrect to register an MSIX number beyond the device's capability.
> 
> Expand the device's MSIX to 10 and introduce the `request_msix_number()`
> helper function to dynamically request an available MSIX number.
> 
> Fixes: 43efb0bfad2b ("hw/cxl/mbox: Wire up interrupts for background completion")
> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>

Hi.

Thanks for testing + the fix.

This looks like a  mess up by me due to reordering patches. 
In the first instance, the fix should just be to increase msi_n (keep it minimal)

The refactor to use an allocator may makes sense as a follow up, but needs
to be used universally for allocation of each msix, not just for the later
ones.  However, it may be simpler to just use an enum with a _MAX final
entry to ensure we allocate the right overall number.  These are fixed
numbers and a restricted resource, so dynamic allocator is probably unnecessary.

Longer term we need to spend some time on automated tests so this sort of silly
bug doesn't happen in future :(

Thanks,

Jonathan


> ---
>  hw/cxl/cxl-device-utils.c   |  3 ++-
>  hw/mem/cxl_type3.c          | 15 ++++++++++++++-
>  include/hw/cxl/cxl_device.h |  2 ++
>  3 files changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c
> index 035d034f6d..8e52af6813 100644
> --- a/hw/cxl/cxl-device-utils.c
> +++ b/hw/cxl/cxl-device-utils.c
> @@ -354,8 +354,9 @@ static void device_reg_init_common(CXLDeviceState *cxl_dstate)
>  
>  static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
>  {
> -    const uint8_t msi_n = 9;
> +    uint8_t msi_n = cxl_request_msi_number();
>  
> +    assert(msi_n > 0);
>      /* 2048 payload size */
>      ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
>                       PAYLOAD_SIZE, CXL_MAILBOX_PAYLOAD_SHIFT);
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 5cf754b38f..dbb1368736 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -835,6 +835,19 @@ static DOEProtocol doe_cdat_prot[] = {
>      { }
>  };
>  
> +#define CT3_MSIX_NUM 10
> +unsigned short cxl_request_msi_number(void)
> +{
> +    const unsigned short start = 6;
> +    static unsigned short next = start;
> +
> +    if (next + 1 >= CT3_MSIX_NUM) {
> +        return -1;
> +    }
> +
> +    return ++next;
> +}
> +
>  static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>  {
>      ERRP_GUARD();
> @@ -843,7 +856,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>      ComponentRegisters *regs = &cxl_cstate->crb;
>      MemoryRegion *mr = &regs->component_registers;
>      uint8_t *pci_conf = pci_dev->config;
> -    unsigned short msix_num = 6;
> +    unsigned short msix_num = CT3_MSIX_NUM;
>      int i, rc;
>      uint16_t count;
>  
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 561b375dc8..622265f50e 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -680,4 +680,6 @@ void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
>                                     uint64_t len);
>  bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
>                                    uint64_t len);
> +unsigned short cxl_request_msi_number(void);
> +
>  #endif
Zhijian Li (Fujitsu)" via Dec. 13, 2024, 8:15 a.m. UTC | #2
>From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>Sent: Thursday, December 12, 2024 20:02
>
>On Thu, 12 Dec 2024 16:55:33 +0800
>Li Zhijian via <qemu-devel@nongnu.org> wrote:
>
>> This assertion always happens when we sanitize the CXL memory device.
>> $ echo 1 > /sys/bus/cxl/devices/mem0/security/sanitize
>>
>> It is incorrect to register an MSIX number beyond the device's capability.
>>
>> Expand the device's MSIX to 10 and introduce the `request_msix_number()`
>> helper function to dynamically request an available MSIX number.
>>
>> Fixes: 43efb0bfad2b ("hw/cxl/mbox: Wire up interrupts for background completion")
>> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
>
>Hi.
>
>Thanks for testing + the fix.
>
>This looks like a  mess up by me due to reordering patches.
>In the first instance, the fix should just be to increase msi_n (keep it minimal)
>

It sounds good to me.


>The refactor to use an allocator may makes sense as a follow up, but needs
>to be used universally for allocation of each msix, not just for the later
>ones.  However, it may be simpler to just use an enum with a _MAX final
>entry to ensure we allocate the right overall number.  These are fixed
>numbers and a restricted resource, so dynamic allocator is probably unnecessary.

Agreed.


>
>Longer term we need to spend some time on automated tests so this sort of silly
>bug doesn't happen in future :(

Agree, I will also consider augmenting the tests for CXL.

Thanks
Zhijian

>
>Thanks,
>
>Jonathan


> ---
>  hw/cxl/cxl-device-utils.c   |  3 ++-
>  hw/mem/cxl_type3.c          | 15 ++++++++++++++-
>  include/hw/cxl/cxl_device.h |  2 ++
>  3 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c
> index 035d034f6d..8e52af6813 100644
> --- a/hw/cxl/cxl-device-utils.c
> +++ b/hw/cxl/cxl-device-utils.c
> @@ -354,8 +354,9 @@ static void device_reg_init_common(CXLDeviceState *cxl_dstate)
>
>  static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
>  {
> -    const uint8_t msi_n = 9;
> +    uint8_t msi_n = cxl_request_msi_number();
>
> +    assert(msi_n > 0);
>      /* 2048 payload size */
>      ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
>                       PAYLOAD_SIZE, CXL_MAILBOX_PAYLOAD_SHIFT);
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 5cf754b38f..dbb1368736 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -835,6 +835,19 @@ static DOEProtocol doe_cdat_prot[] = {
>      { }
>  };
>
> +#define CT3_MSIX_NUM 10
> +unsigned short cxl_request_msi_number(void)
> +{
> +    const unsigned short start = 6;
> +    static unsigned short next = start;
> +
> +    if (next + 1 >= CT3_MSIX_NUM) {
> +        return -1;
> +    }
> +
> +    return ++next;
> +}
> +
>  static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>  {
>      ERRP_GUARD();
> @@ -843,7 +856,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>      ComponentRegisters *regs = &cxl_cstate->crb;
>      MemoryRegion *mr = &regs->component_registers;
>      uint8_t *pci_conf = pci_dev->config;
> -    unsigned short msix_num = 6;
> +    unsigned short msix_num = CT3_MSIX_NUM;
>      int i, rc;
>      uint16_t count;
>
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 561b375dc8..622265f50e 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -680,4 +680,6 @@ void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
>                                     uint64_t len);
>  bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
>                                    uint64_t len);
> +unsigned short cxl_request_msi_number(void);
> +
>  #endif
diff mbox series

Patch

diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c
index 035d034f6d..8e52af6813 100644
--- a/hw/cxl/cxl-device-utils.c
+++ b/hw/cxl/cxl-device-utils.c
@@ -354,8 +354,9 @@  static void device_reg_init_common(CXLDeviceState *cxl_dstate)
 
 static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
 {
-    const uint8_t msi_n = 9;
+    uint8_t msi_n = cxl_request_msi_number();
 
+    assert(msi_n > 0);
     /* 2048 payload size */
     ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
                      PAYLOAD_SIZE, CXL_MAILBOX_PAYLOAD_SHIFT);
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 5cf754b38f..dbb1368736 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -835,6 +835,19 @@  static DOEProtocol doe_cdat_prot[] = {
     { }
 };
 
+#define CT3_MSIX_NUM 10
+unsigned short cxl_request_msi_number(void)
+{
+    const unsigned short start = 6;
+    static unsigned short next = start;
+
+    if (next + 1 >= CT3_MSIX_NUM) {
+        return -1;
+    }
+
+    return ++next;
+}
+
 static void ct3_realize(PCIDevice *pci_dev, Error **errp)
 {
     ERRP_GUARD();
@@ -843,7 +856,7 @@  static void ct3_realize(PCIDevice *pci_dev, Error **errp)
     ComponentRegisters *regs = &cxl_cstate->crb;
     MemoryRegion *mr = &regs->component_registers;
     uint8_t *pci_conf = pci_dev->config;
-    unsigned short msix_num = 6;
+    unsigned short msix_num = CT3_MSIX_NUM;
     int i, rc;
     uint16_t count;
 
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 561b375dc8..622265f50e 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -680,4 +680,6 @@  void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
                                    uint64_t len);
 bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
                                   uint64_t len);
+unsigned short cxl_request_msi_number(void);
+
 #endif