diff mbox series

[3/4] hw/cxl: Add scan media cmd support

Message ID 20230908073152.4386-4-dave@stgolabs.net
State New, archived
Headers show
Series hw/cxl: Support for scan media | expand

Commit Message

Davidlohr Bueso Sept. 8, 2023, 7:31 a.m. UTC
Implement the poison list backup functionality by extending the
qmp poison injection interface to add chunks beyond the poison
limit and use it to recreate the poison list upon overflow.

To this end two new lists are added, bkp and results, which are
drained by moving elements from the backup to the results, which
are consumed by the respective get scan media results cmd.

Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
---
 hw/cxl/cxl-mailbox-utils.c  | 161 ++++++++++++++++++++++++++++++++++--
 hw/mem/cxl_type3.c          |  22 +++--
 include/hw/cxl/cxl_device.h |   9 ++
 3 files changed, 178 insertions(+), 14 deletions(-)

Comments

Jonathan Cameron Sept. 12, 2023, 11:57 a.m. UTC | #1
On Fri,  8 Sep 2023 00:31:51 -0700
Davidlohr Bueso <dave@stgolabs.net> wrote:

> Implement the poison list backup functionality by extending the
> qmp poison injection interface to add chunks beyond the poison
> limit and use it to recreate the poison list upon overflow.
> 
> To this end two new lists are added, bkp and results, which are
> drained by moving elements from the backup to the results, which
> are consumed by the respective get scan media results cmd.
> 
> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>

Hi Davidlohr,

A few comments inline, but all are superficial so I'll start carrying this
on my tree.  Note I haven't yet tidied anything I commented on up.

Jonathan

> ---
>  hw/cxl/cxl-mailbox-utils.c  | 161 ++++++++++++++++++++++++++++++++++--
>  hw/mem/cxl_type3.c          |  22 +++--
>  include/hw/cxl/cxl_device.h |   9 ++
>  3 files changed, 178 insertions(+), 14 deletions(-)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index 3073222060ab..399ac03962db 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -81,6 +81,7 @@ enum {
>          #define INJECT_POISON          0x1
>          #define CLEAR_POISON           0x2
>          #define GET_SCAN_MEDIA_CAPABILITIES 0x3
> +        #define SCAN_MEDIA             0x4
>      DCD_CONFIG  = 0x48,
>          #define GET_DC_CONFIG          0x0
>          #define GET_DYN_CAP_EXT_LIST   0x1
> @@ -1037,6 +1038,10 @@ static CXLRetCode cmd_media_get_poison_list(const struct cxl_cmd *cmd,
>          out->flags = (1 << 1);
>          stq_le_p(&out->overflow_timestamp, ct3d->poison_list_overflow_ts);
>      }
> +    if (scan_media_running(cci)) {
> +        out->flags |= (1 << 2);

Define needed for the flag.

> +    }
> +
>      stw_le_p(&out->count, record_count);
>      *len_out = out_pl_len;
>      return CXL_MBOX_SUCCESS;
> @@ -1065,6 +1070,16 @@ static CXLRetCode cmd_media_inject_poison(const struct cxl_cmd *cmd,
>              return CXL_MBOX_SUCCESS;
>          }
>      }
> +    /*
> +     * Freeze the list if there is an on-going scan media operation.
> +     */
> +    if (scan_media_running(cci)) {
> +        /*
> +         * XXX: Spec is ambiguous - is this case considered
> +         * a successful return despite not adding to the list?
> +         */

As below. Can't see this one getting clarified.

> +        goto success;
> +    }
>  
>      if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) {
>          return CXL_MBOX_INJECT_POISON_LIMIT;
> @@ -1080,6 +1095,7 @@ static CXLRetCode cmd_media_inject_poison(const struct cxl_cmd *cmd,
>       */
>      QLIST_INSERT_HEAD(poison_list, p, node);
>      ct3d->poison_list_cnt++;
> +success:
>      *len_out = 0;
>  
>      return CXL_MBOX_SUCCESS;
> @@ -1123,6 +1139,17 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd,
>          }
>      }
>  
> +    /*
> +     * Freeze the list if there is an on-going scan media operation.

Single line comment.

> +     */
> +    if (scan_media_running(cci)) {
> +        /*
> +         * XXX: Spec is ambiguous - is this case considered
> +         * a successful return despite not removing from the list?
> +         */

I think we are in impdef territory here, though happy to join the conversation
if you want to raise it in CXL consortium.  Would need an errata though so
I suspect it will stay impdef with likely reasoning being.

OS should only use scan_media if it know poison list is overflowed.
It should know it issued scan media.
It should not do anything so silly has hit the device with a poison clear
in parallel to that.  Hence doesn't matter that this corner is impdef.

I'd like to see a comment to say it is though!

> +        goto success;
> +    }
> +
>      QLIST_FOREACH(ent, poison_list, node) {
>          /*
>           * Test for contained in entry. Simpler than general case
> @@ -1133,7 +1160,7 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd,
>          }
>      }
>      if (!ent) {
> -        return CXL_MBOX_SUCCESS;
> +        goto success;

Is this a bug fix?  I think the explicit setting of len_out came in somewhere but don't
think it is necessary for commands that have no output payload.  If it is, then we should
set it at the top of the function as they never have a payload.

>      }
>  
>      QLIST_REMOVE(ent, node);
> @@ -1170,6 +1197,7 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd,
>      }
>      /* Any fragments have been added, free original entry */
>      g_free(ent);
> +success:
>      *len_out = 0;
>  
>      return CXL_MBOX_SUCCESS;
> @@ -1225,6 +1253,124 @@ cmd_media_get_scan_media_capabilities(const struct cxl_cmd *cmd,
>      return CXL_MBOX_SUCCESS;
>  }
>  
> +static void __do_scan_media(CXLType3Dev *ct3d)
> +{
> +    CXLPoison *ent;
> +    unsigned int results_cnt = 0;
> +
> +    QLIST_FOREACH(ent, &ct3d->scan_media_results, node) {
> +        results_cnt++;
> +    }
> +
> +    /* only scan media may clear the overflow */
> +    if (ct3d->poison_list_overflowed &&
> +        ct3d->poison_list_cnt == results_cnt) {
> +        cxl_clear_poison_list_overflowed(ct3d);
> +    }
> +}
> +
> +/*
> + * CXL r3.0 section 8.2.9.8.4.5: Scan Media
> + */
> +static CXLRetCode cmd_media_scan_media(const struct cxl_cmd *cmd,
> +                                       uint8_t *payload_in,
> +                                       size_t len_in,
> +                                       uint8_t *payload_out,
> +                                       size_t *len_out,
> +                                       CXLCCI *cci)
> +{
> +    struct scan_media_pl {
> +        uint64_t pa;
> +        uint64_t length;
> +        uint8_t flags;
> +    } QEMU_PACKED;
> +
> +    struct scan_media_pl *in = (void *)payload_in;
> +    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> +    CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate;
> +    uint64_t query_start;
> +    uint64_t query_length;
> +    CXLPoison *ent, *next;
> +
> +    query_start = ldq_le_p(&in->pa);
> +    /* 64 byte alignment required */
> +    if (query_start & 0x3f) {
> +        return CXL_MBOX_INVALID_INPUT;
> +    }
> +    query_length = ldq_le_p(&in->length) * CXL_CACHE_LINE_SIZE;
> +
> +    if (query_start + query_length > cxl_dstate->static_mem_size) {
> +        return CXL_MBOX_INVALID_PA;
> +    }
> +    if (ct3d->dc.num_regions && query_start + query_length >=
> +            cxl_dstate->static_mem_size + ct3d->dc.total_capacity) {

If in DCD range we will never get to this test as we already failed
on matching against static_mem_size above.

Can we just drop the earlier test?

> +        return CXL_MBOX_INVALID_PA;
> +    }
> +
> +    if (in->flags == 0) { /* TODO */
Good to separate match on first bit vs seeing reserved bits.
Makes no difference today, but will make things potentially easier to extend
in long run.

> +        qemu_log_mask(LOG_UNIMP,
> +                      "Scan Media Event Log is unsupported\n");
> +    }
> +
> +    /* any previous results are discarded upon a new Scan Media */
> +    QLIST_FOREACH_SAFE(ent, &ct3d->scan_media_results, node, next) {
> +        QLIST_REMOVE(ent, node);
> +        g_free(ent);
> +    }
> +
> +    /* kill the poison list - it will be recreated */

Perhaps call out that the spec says a device 'may update it's Poison List...'

> +    if (ct3d->poison_list_overflowed) {
> +        QLIST_FOREACH_SAFE(ent, &ct3d->poison_list, node, next) {
> +            QLIST_REMOVE(ent, node);
> +            g_free(ent);
> +            ct3d->poison_list_cnt--;
> +        }
> +    }
> +
> +    /*
> +     * Scan the backup list and move corresponding entries
> +     * into the results list, updating the poison list
> +     * when possible.
> +     */
> +    QLIST_FOREACH_SAFE(ent, &ct3d->poison_list_bkp, node, next) {
> +        CXLPoison *res;
> +
> +        if (ent->start >= query_start + query_length ||
> +            ent->start + ent->length <= query_start) {
> +            continue;
> +        }
> +
> +        /*
> +         * If a Get Poison List cmd comes in while this
> +         * scan is being done, it will see the new complete
> +         * list, while setting the respective flag.

I'd be more specific
"while setting the Scan Media in Progress flag in the Get Poison
List Output Payload."


> +         */
> +        if (ct3d->poison_list_cnt < CXL_POISON_LIST_LIMIT) {
> +            CXLPoison *p = g_new0(CXLPoison, 1);
> +
> +            p->start = ent->start;
> +            p->length = ent->length;
> +            p->type = ent->type;
> +            QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
> +            ct3d->poison_list_cnt++;

Maybe worth a utility function for this adding one to the poison
list.  Or duplicate the memory and insert that to the poison list?

> +        }
> +
> +        res = g_new0(CXLPoison, 1);
> +        res->start = ent->start;
> +        res->length = ent->length;
> +        res->type = ent->type;
> +        QLIST_INSERT_HEAD(&ct3d->scan_media_results, res, node);
> +
> +        QLIST_REMOVE(ent, node);

Can't we just remove from the poison_list_bkp first then add it
to the scan_media_results list?  I.e. Don't make a new one.

> +        g_free(ent);
> +    }
> +
> +    cci->bg.runtime = MAX(1, query_length * (0.0005L/64));
Spaces around /

> +    *len_out = 0;
> +
> +    return CXL_MBOX_BG_STARTED;
> +}
> +
>  /*
>   * CXL r3.0 section 8.2.9.8.9.1: Dynamic Capacity Configuration
>   */
> @@ -1655,6 +1801,8 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
>          cmd_media_clear_poison, 72, 0 },
>      [MEDIA_AND_POISON][GET_SCAN_MEDIA_CAPABILITIES] = { "MEDIA_AND_POISON_GET_SCAN_MEDIA_CAPABILITIES",
>          cmd_media_get_scan_media_capabilities, 16, 0 },
> +    [MEDIA_AND_POISON][SCAN_MEDIA] = { "MEDIA_AND_POISON_SCAN_MEDIA",
> +        cmd_media_scan_media, 17, BACKGROUND_OPERATION },
>  };
>  
>  static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
> @@ -1783,16 +1931,15 @@ static void bg_timercb(void *opaque)
>          cci->bg.complete_pct = 100;
>          cci->bg.ret_code = ret;
>          if (ret == CXL_MBOX_SUCCESS) {
> +            CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> +
>              switch (cci->bg.opcode) {
>              case 0x4400: /* sanitize */
> -            {
> -                CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> -
>                  __do_sanitization(ct3d);
>                  cxl_dev_enable_media(&ct3d->cxl_dstate);
> -            }
> -            break;
> -            case 0x4304: /* TODO: scan media */
> +                break;
> +            case 0x4304: /* scan media */
> +                __do_scan_media(ct3d);
>                  break;
>              default:
>                  __builtin_unreachable();
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index b90a7397d62f..aa9fdde1436e 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -1435,6 +1435,12 @@ void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d)
>              cxl_device_get_timestamp(&ct3d->cxl_dstate);
>  }
>  
> +void cxl_clear_poison_list_overflowed(CXLType3Dev *ct3d)
> +{
> +    ct3d->poison_list_overflowed = false;
> +    ct3d->poison_list_overflow_ts = 0;
> +}
> +
>  void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
>                             Error **errp)
>  {
> @@ -1470,18 +1476,20 @@ void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
>          }
>      }
>  
> -    if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) {
> -        cxl_set_poison_list_overflowed(ct3d);
> -        return;
> -    }
> -
>      p = g_new0(CXLPoison, 1);
>      p->length = length;
>      p->start = start;
>      p->type = CXL_POISON_TYPE_INTERNAL; /* Different from injected via the mbox */
>  
> -    QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
> -    ct3d->poison_list_cnt++;
> +    if (ct3d->poison_list_cnt < CXL_POISON_LIST_LIMIT) {
> +        QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
> +        ct3d->poison_list_cnt++;
> +    } else {
> +        if (!ct3d->poison_list_overflowed) {
> +            cxl_set_poison_list_overflowed(ct3d);
> +        }
> +        QLIST_INSERT_HEAD(&ct3d->poison_list_bkp, p, node);
> +    }
>  }
>  
>  /* For uncorrectable errors include support for multiple header recording */
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index e824c5ade8a1..eb5c5284fa9f 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -385,6 +385,11 @@ static inline void __toggle_media(CXLDeviceState *cxl_dstate, int val)
>  #define cxl_dev_enable_media(cxlds)                     \
>          do { __toggle_media((cxlds), 0x1); } while (0)
>  
> +static inline bool scan_media_running(CXLCCI *cci)
> +{
> +    return !!cci->bg.runtime && cci->bg.opcode == 0x4304;

I'd prefer the opcode being built which would mean moving this
implementation down to where we can see the defines.

(MEDIA_AND_POISON << 8) | SCAN_MEDIA 

I considered suggesting a macro, but if we do it should be separate
patch and include the other cases in this file.

Hence one for another day.

> +}
> +
>  static inline bool sanitize_running(CXLCCI *cci)
>  {
>      return !!cci->bg.runtime && cci->bg.opcode == 0x4400;
> @@ -476,6 +481,9 @@ struct CXLType3Dev {
>      unsigned int poison_list_cnt;
>      bool poison_list_overflowed;
>      uint64_t poison_list_overflow_ts;
> +    /* Poison Injection - backup */
> +    CXLPoisonList poison_list_bkp;
> +    CXLPoisonList scan_media_results;
>  
>      struct dynamic_capacity {
>          HostMemoryBackend *host_dc;
> @@ -538,6 +546,7 @@ CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds,
>  void cxl_event_irq_assert(CXLType3Dev *ct3d);
>  
>  void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d);
> +void cxl_clear_poison_list_overflowed(CXLType3Dev *ct3d);
>  
>  CXLDCDRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len);
>
Davidlohr Bueso Sept. 13, 2023, 3:47 a.m. UTC | #2
On Tue, 12 Sep 2023, Jonathan Cameron wrote:

>On Fri,  8 Sep 2023 00:31:51 -0700
>> +    if (scan_media_running(cci)) {
>> +        /*
>> +         * XXX: Spec is ambiguous - is this case considered
>> +         * a successful return despite not removing from the list?
>> +         */
>
>I think we are in impdef territory here, though happy to join the conversation
>if you want to raise it in CXL consortium.  Would need an errata though so
>I suspect it will stay impdef with likely reasoning being.
>
>OS should only use scan_media if it know poison list is overflowed.
>It should know it issued scan media.
>It should not do anything so silly has hit the device with a poison clear
>in parallel to that.  Hence doesn't matter that this corner is impdef.

Right, this is very much along the lines of what I have the kernel currently
doing. That is, directly start the scan from get poison list if the overflow
bit returns set. This does not follow userspace policy once discussed and
hence no possibility of calling scan media under bogus conditions. Inject/clear
are also serialized vs this, so no funny business either.

>
>I'd like to see a comment to say it is though!
>
>> +        goto success;
>> +    }
>> +
>>      QLIST_FOREACH(ent, poison_list, node) {
>>          /*
>>           * Test for contained in entry. Simpler than general case
>> @@ -1133,7 +1160,7 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd,
>>          }
>>      }
>>      if (!ent) {
>> -        return CXL_MBOX_SUCCESS;
>> +        goto success;
>
>Is this a bug fix?  I think the explicit setting of len_out came in somewhere but don't
>think it is necessary for commands that have no output payload.  If it is, then we should
>set it at the top of the function as they never have a payload.

Yeah it doesn't matter for this case.

Thanks,
Davidlohr
diff mbox series

Patch

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 3073222060ab..399ac03962db 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -81,6 +81,7 @@  enum {
         #define INJECT_POISON          0x1
         #define CLEAR_POISON           0x2
         #define GET_SCAN_MEDIA_CAPABILITIES 0x3
+        #define SCAN_MEDIA             0x4
     DCD_CONFIG  = 0x48,
         #define GET_DC_CONFIG          0x0
         #define GET_DYN_CAP_EXT_LIST   0x1
@@ -1037,6 +1038,10 @@  static CXLRetCode cmd_media_get_poison_list(const struct cxl_cmd *cmd,
         out->flags = (1 << 1);
         stq_le_p(&out->overflow_timestamp, ct3d->poison_list_overflow_ts);
     }
+    if (scan_media_running(cci)) {
+        out->flags |= (1 << 2);
+    }
+
     stw_le_p(&out->count, record_count);
     *len_out = out_pl_len;
     return CXL_MBOX_SUCCESS;
@@ -1065,6 +1070,16 @@  static CXLRetCode cmd_media_inject_poison(const struct cxl_cmd *cmd,
             return CXL_MBOX_SUCCESS;
         }
     }
+    /*
+     * Freeze the list if there is an on-going scan media operation.
+     */
+    if (scan_media_running(cci)) {
+        /*
+         * XXX: Spec is ambiguous - is this case considered
+         * a successful return despite not adding to the list?
+         */
+        goto success;
+    }
 
     if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) {
         return CXL_MBOX_INJECT_POISON_LIMIT;
@@ -1080,6 +1095,7 @@  static CXLRetCode cmd_media_inject_poison(const struct cxl_cmd *cmd,
      */
     QLIST_INSERT_HEAD(poison_list, p, node);
     ct3d->poison_list_cnt++;
+success:
     *len_out = 0;
 
     return CXL_MBOX_SUCCESS;
@@ -1123,6 +1139,17 @@  static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd,
         }
     }
 
+    /*
+     * Freeze the list if there is an on-going scan media operation.
+     */
+    if (scan_media_running(cci)) {
+        /*
+         * XXX: Spec is ambiguous - is this case considered
+         * a successful return despite not removing from the list?
+         */
+        goto success;
+    }
+
     QLIST_FOREACH(ent, poison_list, node) {
         /*
          * Test for contained in entry. Simpler than general case
@@ -1133,7 +1160,7 @@  static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd,
         }
     }
     if (!ent) {
-        return CXL_MBOX_SUCCESS;
+        goto success;
     }
 
     QLIST_REMOVE(ent, node);
@@ -1170,6 +1197,7 @@  static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd,
     }
     /* Any fragments have been added, free original entry */
     g_free(ent);
+success:
     *len_out = 0;
 
     return CXL_MBOX_SUCCESS;
@@ -1225,6 +1253,124 @@  cmd_media_get_scan_media_capabilities(const struct cxl_cmd *cmd,
     return CXL_MBOX_SUCCESS;
 }
 
+static void __do_scan_media(CXLType3Dev *ct3d)
+{
+    CXLPoison *ent;
+    unsigned int results_cnt = 0;
+
+    QLIST_FOREACH(ent, &ct3d->scan_media_results, node) {
+        results_cnt++;
+    }
+
+    /* only scan media may clear the overflow */
+    if (ct3d->poison_list_overflowed &&
+        ct3d->poison_list_cnt == results_cnt) {
+        cxl_clear_poison_list_overflowed(ct3d);
+    }
+}
+
+/*
+ * CXL r3.0 section 8.2.9.8.4.5: Scan Media
+ */
+static CXLRetCode cmd_media_scan_media(const struct cxl_cmd *cmd,
+                                       uint8_t *payload_in,
+                                       size_t len_in,
+                                       uint8_t *payload_out,
+                                       size_t *len_out,
+                                       CXLCCI *cci)
+{
+    struct scan_media_pl {
+        uint64_t pa;
+        uint64_t length;
+        uint8_t flags;
+    } QEMU_PACKED;
+
+    struct scan_media_pl *in = (void *)payload_in;
+    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+    CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate;
+    uint64_t query_start;
+    uint64_t query_length;
+    CXLPoison *ent, *next;
+
+    query_start = ldq_le_p(&in->pa);
+    /* 64 byte alignment required */
+    if (query_start & 0x3f) {
+        return CXL_MBOX_INVALID_INPUT;
+    }
+    query_length = ldq_le_p(&in->length) * CXL_CACHE_LINE_SIZE;
+
+    if (query_start + query_length > cxl_dstate->static_mem_size) {
+        return CXL_MBOX_INVALID_PA;
+    }
+    if (ct3d->dc.num_regions && query_start + query_length >=
+            cxl_dstate->static_mem_size + ct3d->dc.total_capacity) {
+        return CXL_MBOX_INVALID_PA;
+    }
+
+    if (in->flags == 0) { /* TODO */
+        qemu_log_mask(LOG_UNIMP,
+                      "Scan Media Event Log is unsupported\n");
+    }
+
+    /* any previous results are discarded upon a new Scan Media */
+    QLIST_FOREACH_SAFE(ent, &ct3d->scan_media_results, node, next) {
+        QLIST_REMOVE(ent, node);
+        g_free(ent);
+    }
+
+    /* kill the poison list - it will be recreated */
+    if (ct3d->poison_list_overflowed) {
+        QLIST_FOREACH_SAFE(ent, &ct3d->poison_list, node, next) {
+            QLIST_REMOVE(ent, node);
+            g_free(ent);
+            ct3d->poison_list_cnt--;
+        }
+    }
+
+    /*
+     * Scan the backup list and move corresponding entries
+     * into the results list, updating the poison list
+     * when possible.
+     */
+    QLIST_FOREACH_SAFE(ent, &ct3d->poison_list_bkp, node, next) {
+        CXLPoison *res;
+
+        if (ent->start >= query_start + query_length ||
+            ent->start + ent->length <= query_start) {
+            continue;
+        }
+
+        /*
+         * If a Get Poison List cmd comes in while this
+         * scan is being done, it will see the new complete
+         * list, while setting the respective flag.
+         */
+        if (ct3d->poison_list_cnt < CXL_POISON_LIST_LIMIT) {
+            CXLPoison *p = g_new0(CXLPoison, 1);
+
+            p->start = ent->start;
+            p->length = ent->length;
+            p->type = ent->type;
+            QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
+            ct3d->poison_list_cnt++;
+        }
+
+        res = g_new0(CXLPoison, 1);
+        res->start = ent->start;
+        res->length = ent->length;
+        res->type = ent->type;
+        QLIST_INSERT_HEAD(&ct3d->scan_media_results, res, node);
+
+        QLIST_REMOVE(ent, node);
+        g_free(ent);
+    }
+
+    cci->bg.runtime = MAX(1, query_length * (0.0005L/64));
+    *len_out = 0;
+
+    return CXL_MBOX_BG_STARTED;
+}
+
 /*
  * CXL r3.0 section 8.2.9.8.9.1: Dynamic Capacity Configuration
  */
@@ -1655,6 +1801,8 @@  static const struct cxl_cmd cxl_cmd_set[256][256] = {
         cmd_media_clear_poison, 72, 0 },
     [MEDIA_AND_POISON][GET_SCAN_MEDIA_CAPABILITIES] = { "MEDIA_AND_POISON_GET_SCAN_MEDIA_CAPABILITIES",
         cmd_media_get_scan_media_capabilities, 16, 0 },
+    [MEDIA_AND_POISON][SCAN_MEDIA] = { "MEDIA_AND_POISON_SCAN_MEDIA",
+        cmd_media_scan_media, 17, BACKGROUND_OPERATION },
 };
 
 static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
@@ -1783,16 +1931,15 @@  static void bg_timercb(void *opaque)
         cci->bg.complete_pct = 100;
         cci->bg.ret_code = ret;
         if (ret == CXL_MBOX_SUCCESS) {
+            CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+
             switch (cci->bg.opcode) {
             case 0x4400: /* sanitize */
-            {
-                CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
-
                 __do_sanitization(ct3d);
                 cxl_dev_enable_media(&ct3d->cxl_dstate);
-            }
-            break;
-            case 0x4304: /* TODO: scan media */
+                break;
+            case 0x4304: /* scan media */
+                __do_scan_media(ct3d);
                 break;
             default:
                 __builtin_unreachable();
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index b90a7397d62f..aa9fdde1436e 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1435,6 +1435,12 @@  void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d)
             cxl_device_get_timestamp(&ct3d->cxl_dstate);
 }
 
+void cxl_clear_poison_list_overflowed(CXLType3Dev *ct3d)
+{
+    ct3d->poison_list_overflowed = false;
+    ct3d->poison_list_overflow_ts = 0;
+}
+
 void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
                            Error **errp)
 {
@@ -1470,18 +1476,20 @@  void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
         }
     }
 
-    if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) {
-        cxl_set_poison_list_overflowed(ct3d);
-        return;
-    }
-
     p = g_new0(CXLPoison, 1);
     p->length = length;
     p->start = start;
     p->type = CXL_POISON_TYPE_INTERNAL; /* Different from injected via the mbox */
 
-    QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
-    ct3d->poison_list_cnt++;
+    if (ct3d->poison_list_cnt < CXL_POISON_LIST_LIMIT) {
+        QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
+        ct3d->poison_list_cnt++;
+    } else {
+        if (!ct3d->poison_list_overflowed) {
+            cxl_set_poison_list_overflowed(ct3d);
+        }
+        QLIST_INSERT_HEAD(&ct3d->poison_list_bkp, p, node);
+    }
 }
 
 /* For uncorrectable errors include support for multiple header recording */
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index e824c5ade8a1..eb5c5284fa9f 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -385,6 +385,11 @@  static inline void __toggle_media(CXLDeviceState *cxl_dstate, int val)
 #define cxl_dev_enable_media(cxlds)                     \
         do { __toggle_media((cxlds), 0x1); } while (0)
 
+static inline bool scan_media_running(CXLCCI *cci)
+{
+    return !!cci->bg.runtime && cci->bg.opcode == 0x4304;
+}
+
 static inline bool sanitize_running(CXLCCI *cci)
 {
     return !!cci->bg.runtime && cci->bg.opcode == 0x4400;
@@ -476,6 +481,9 @@  struct CXLType3Dev {
     unsigned int poison_list_cnt;
     bool poison_list_overflowed;
     uint64_t poison_list_overflow_ts;
+    /* Poison Injection - backup */
+    CXLPoisonList poison_list_bkp;
+    CXLPoisonList scan_media_results;
 
     struct dynamic_capacity {
         HostMemoryBackend *host_dc;
@@ -538,6 +546,7 @@  CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds,
 void cxl_event_irq_assert(CXLType3Dev *ct3d);
 
 void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d);
+void cxl_clear_poison_list_overflowed(CXLType3Dev *ct3d);
 
 CXLDCDRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len);