diff mbox series

[19/19] tools/testing/cxl: Emulate a CXL accelerator with local memory

Message ID 168592160379.1948938.12863272903570476312.stgit@dwillia2-xfh.jf.intel.com
State New, archived
Headers show
Series cxl: Device memory setup | expand

Commit Message

Dan Williams June 4, 2023, 11:33 p.m. UTC
Mock-up a device that does not have a standard mailbox, i.e. a device
that does not implement the CXL memory-device class code, but wants to
map "device" memory (aka Type-2, aka HDM-D[B], aka accelerator memory).

For extra code coverage make this device an RCD to test region creation
flows in the presence of an RCH topology (memory device modeled as a
root-complex-integrated-endpoint RCIEP).

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/memdev.c    |   15 +++++++
 drivers/cxl/cxlmem.h         |    1 
 tools/testing/cxl/test/cxl.c |   16 +++++++-
 tools/testing/cxl/test/mem.c |   85 +++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 112 insertions(+), 5 deletions(-)

Comments

Jonathan Cameron June 6, 2023, 3:34 p.m. UTC | #1
On Sun, 04 Jun 2023 16:33:23 -0700
Dan Williams <dan.j.williams@intel.com> wrote:

> Mock-up a device that does not have a standard mailbox, i.e. a device
> that does not implement the CXL memory-device class code, but wants to
> map "device" memory (aka Type-2, aka HDM-D[B], aka accelerator memory).
> 
> For extra code coverage make this device an RCD to test region creation
> flows in the presence of an RCH topology (memory device modeled as a
> root-complex-integrated-endpoint RCIEP).
> 
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Whilst this looks superficially find, I haven't reviewed it in enough
depth to give a tag. Might get back to it at somepoint, but don't
wait on me!

Jonathan
Vikram Sethi June 7, 2023, 9:09 p.m. UTC | #2
Thanks for posting this Dan. 
> From: Dan Williams <dan.j.williams@intel.com>
> Sent: Sunday, June 4, 2023 6:33 PM
> To: linux-cxl@vger.kernel.org
> Cc: ira.weiny@intel.com; navneet.singh@intel.com
> Subject: [PATCH 19/19] tools/testing/cxl: Emulate a CXL accelerator with local
> memory 
> 
> Mock-up a device that does not have a standard mailbox, i.e. a device that
> does not implement the CXL memory-device class code, but wants to map
> "device" memory (aka Type-2, aka HDM-D[B], aka accelerator memory).
> 
> For extra code coverage make this device an RCD to test region creation
> flows in the presence of an RCH topology (memory device modeled as a
> root-complex-integrated-endpoint RCIEP).
> 
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/cxl/core/memdev.c    |   15 +++++++
>  drivers/cxl/cxlmem.h         |    1
>  tools/testing/cxl/test/cxl.c |   16 +++++++-
>  tools/testing/cxl/test/mem.c |   85
> +++++++++++++++++++++++++++++++++++++++++-
>  4 files changed, 112 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index
> 859c43c340bb..5d1ba7a72567 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -467,6 +467,21 @@ static void detach_memdev(struct work_struct
> *work)
>         put_device(&cxlmd->dev);
>  }
> 
> +struct cxl_dev_state *cxl_accel_state_create(struct device *dev) {
> +       struct cxl_dev_state *cxlds;
> +
> +       cxlds = devm_kzalloc(dev, sizeof(*cxlds), GFP_KERNEL);
> +       if (!cxlds)
> +               return ERR_PTR(-ENOMEM);
> +
> +       cxlds->dev = dev;
> +       cxlds->type = CXL_DEVTYPE_DEVMEM;
> +
> +       return cxlds;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_accel_state_create, CXL);
> +
>  static struct lock_class_key cxl_memdev_key;
> 
>  static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index
> ad7f806549d3..89e560ea14c0 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -746,6 +746,7 @@ int cxl_await_media_ready(struct cxl_dev_state
> *cxlds);  int cxl_enumerate_cmds(struct cxl_memdev_state *mds);  int
> cxl_mem_create_range_info(struct cxl_memdev_state *mds);  struct
> cxl_memdev_state *cxl_memdev_state_create(struct device *dev);
> +struct cxl_dev_state *cxl_accel_state_create(struct device *dev);
>  void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
>                                 unsigned long *cmds);  void
> clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, diff --git
> a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index
> e3f1b2e88e3e..385cdeeab22c 100644
> --- a/tools/testing/cxl/test/cxl.c
> +++ b/tools/testing/cxl/test/cxl.c
> @@ -278,7 +278,7 @@ static struct {
>                         },
>                         .interleave_ways = 0,
>                         .granularity = 4,
> -                       .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
> +                       .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE2 |
>                                         ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
>                         .qtg_id = 5,
>                         .window_size = SZ_256M, @@ -713,7 +713,19 @@ static void
> default_mock_decoder(struct cxl_decoder *cxld)
> 
>         cxld->interleave_ways = 1;
>         cxld->interleave_granularity = 256;
> -       cxld->target_type = CXL_DECODER_HOSTMEM;
> +       if (is_endpoint_decoder(&cxld->dev)) {
> +               struct cxl_endpoint_decoder *cxled;
> +               struct cxl_dev_state *cxlds;
> +               struct cxl_memdev *cxlmd;
> +
> +               cxled = to_cxl_endpoint_decoder(&cxld->dev);
> +               cxlmd = cxled_to_memdev(cxled);
> +               cxlds = cxlmd->cxlds;
> +               if (cxlds->type == CXL_DEVTYPE_CLASSMEM)
> +                       cxld->target_type = CXL_DECODER_HOSTMEM;
> +               else
> +                       cxld->target_type = CXL_DECODER_DEVMEM;
> +       }
>         cxld->commit = mock_decoder_commit;
>         cxld->reset = mock_decoder_reset;  } diff --git
> a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index
> 6fb5718588f3..620bfcf5e5a5 100644
> --- a/tools/testing/cxl/test/mem.c
> +++ b/tools/testing/cxl/test/mem.c
> @@ -1189,11 +1189,21 @@ static void label_area_release(void *lsa)
>         vfree(lsa);
>  }
> 
> +#define CXL_MOCKMEM_RCD BIT(0)
> +#define CXL_MOCKMEM_TYPE2 BIT(1)
> +
>  static bool is_rcd(struct platform_device *pdev)  {
>         const struct platform_device_id *id = platform_get_device_id(pdev);
> 
> -       return !!id->driver_data;
> +       return !!(id->driver_data & CXL_MOCKMEM_RCD); }
> +
> +static bool is_type2(struct platform_device *pdev) {
> +       const struct platform_device_id *id =
> +platform_get_device_id(pdev);
> +
> +       return !!(id->driver_data & CXL_MOCKMEM_TYPE2);
>  }
> 
>  static ssize_t event_trigger_store(struct device *dev, @@ -1205,7 +1215,7
> @@ static ssize_t event_trigger_store(struct device *dev,  }  static
> DEVICE_ATTR_WO(event_trigger);
> 
> -static int cxl_mock_mem_probe(struct platform_device *pdev)
> +static int __cxl_mock_mem_probe(struct platform_device *pdev)
>  {
>         struct device *dev = &pdev->dev;
>         struct cxl_memdev *cxlmd;
> @@ -1274,6 +1284,75 @@ static int cxl_mock_mem_probe(struct
> platform_device *pdev)
>         return 0;
>  }
> 
> +static int cxl_mock_type2_probe(struct platform_device *pdev) {
> +       struct cxl_endpoint_decoder *cxled;
> +       struct device *dev = &pdev->dev;
> +       struct cxl_root_decoder *cxlrd;
> +       struct cxl_dev_state *cxlds;
> +       struct cxl_port *endpoint;
> +       struct cxl_memdev *cxlmd;
> +       resource_size_t max = 0;
> +       int rc;
> +
> +       cxlds = cxl_accel_state_create(dev);
> +       if (IS_ERR(cxlds))
> +               return PTR_ERR(cxlds);
> +
> +       cxlds->serial = pdev->id;
> +       cxlds->component_reg_phys = CXL_RESOURCE_NONE;
> +       cxlds->dpa_res = DEFINE_RES_MEM(0, DEV_SIZE);
> +       cxlds->ram_res = DEFINE_RES_MEM_NAMED(0, DEV_SIZE, "ram");
> +       cxlds->pmem_res = DEFINE_RES_MEM_NAMED(DEV_SIZE, 0, "pmem");
> +       if (is_rcd(pdev))
> +               cxlds->rcd = true;
> +
> +       rc = request_resource(&cxlds->dpa_res, &cxlds->ram_res);
> +       if (rc)
> +               return rc;
> +
> +       cxlmd = devm_cxl_add_memdev(cxlds);
> +       if (IS_ERR(cxlmd))
> +               return PTR_ERR(cxlmd);
> +
> +       endpoint = cxl_acquire_endpoint(cxlmd);
> +       if (IS_ERR(endpoint))
> +               return PTR_ERR(endpoint);
> +
> +       cxlrd = cxl_hpa_freespace(endpoint, &endpoint->host_bridge, 1,
> +                                 CXL_DECODER_F_RAM | CXL_DECODER_F_TYPE2,
> +                                 &max);
> +

IIUC, finding free HPA space is for the case where the platform FW has not already allocated it and initialized the HDM ranges in the device decoders, correct?
If the accelerator driver recognized that FW had initialized its HPA ranges in the device decoders (without committing/locking the decoders), could it skip the cxl_hpa_freespace call?
It would seem reasonable for FW to init the decoder but not commit/lock it. 

> +       if (IS_ERR(cxlrd)) {
> +               rc = PTR_ERR(cxlrd);
> +               goto out;
> +       }
> +
> +       cxled = cxl_request_dpa(endpoint, CXL_DECODER_RAM, 0, max);
> +       if (IS_ERR(cxled)) {
> +               rc = PTR_ERR(cxled);
> +               goto out_cxlrd;
> +       }
> +
> +       /* A real driver would do something with the returned region */
> +       rc = PTR_ERR_OR_ZERO(cxl_create_region(cxlrd, &cxled, 1));

Assuming the accelerator driver wanted to add some, or all of its coherent memory to the kernel MM via add_memory_driver_managed, I think it would get the HPA ranges from the decoder's hpa_range field. 
But that API also needs a node ID. 
If the FW ACPI tables had shown the accelerator Generic Initiator Affinity structure, then I believe the accelerator's device struct should already have its numa node, and the same could be passed to add_memory_driver_managed. Does that sound right, or is there a better way to ensure the accelerator memory gets a distinct NUMA node?
If the ACPI tables had not shown the device as a generic initiator, is there any notion of the cxl memory device structs having a new/distinct NUMA node for the memory device, or would it just be pointing to the NUMA node of the associated CPU socket which has the host bridge or a default 0 NUMA node?

> +
> +       put_device(cxled_dev(cxled));
> +out_cxlrd:
> +       put_device(cxlrd_dev(cxlrd));
> +out:
> +       cxl_release_endpoint(cxlmd, endpoint);
> +
> +       return rc;
> +}
> +
> +static int cxl_mock_mem_probe(struct platform_device *pdev) {
> +       if (is_type2(pdev))
> +               return cxl_mock_type2_probe(pdev);
> +       return __cxl_mock_mem_probe(pdev); }
> +
>  static ssize_t security_lock_show(struct device *dev,
>                                   struct device_attribute *attr, char *buf)  { @@ -1316,7
> +1395,7 @@ ATTRIBUTE_GROUPS(cxl_mock_mem);
> 
>  static const struct platform_device_id cxl_mock_mem_ids[] = {
>         { .name = "cxl_mem", 0 },
> -       { .name = "cxl_rcd", 1 },
> +       { .name = "cxl_rcd", CXL_MOCKMEM_RCD | CXL_MOCKMEM_TYPE2 },
>         { },
>  };
>  MODULE_DEVICE_TABLE(platform, cxl_mock_mem_ids);
Jonathan Cameron June 8, 2023, 10:47 a.m. UTC | #3
On Wed, 7 Jun 2023 21:09:21 +0000
Vikram Sethi <vsethi@nvidia.com> wrote:

> Thanks for posting this Dan. 
> > From: Dan Williams <dan.j.williams@intel.com>
> > Sent: Sunday, June 4, 2023 6:33 PM
> > To: linux-cxl@vger.kernel.org
> > Cc: ira.weiny@intel.com; navneet.singh@intel.com
> > Subject: [PATCH 19/19] tools/testing/cxl: Emulate a CXL accelerator with local
> > memory 
> > 
> > Mock-up a device that does not have a standard mailbox, i.e. a device that
> > does not implement the CXL memory-device class code, but wants to map
> > "device" memory (aka Type-2, aka HDM-D[B], aka accelerator memory).
> > 
> > For extra code coverage make this device an RCD to test region creation
> > flows in the presence of an RCH topology (memory device modeled as a
> > root-complex-integrated-endpoint RCIEP).
> > 
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > ---
> >  drivers/cxl/core/memdev.c    |   15 +++++++
> >  drivers/cxl/cxlmem.h         |    1
> >  tools/testing/cxl/test/cxl.c |   16 +++++++-
> >  tools/testing/cxl/test/mem.c |   85
> > +++++++++++++++++++++++++++++++++++++++++-
> >  4 files changed, 112 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index
> > 859c43c340bb..5d1ba7a72567 100644
> > --- a/drivers/cxl/core/memdev.c
> > +++ b/drivers/cxl/core/memdev.c
> > @@ -467,6 +467,21 @@ static void detach_memdev(struct work_struct
> > *work)
> >         put_device(&cxlmd->dev);
> >  }
> > 
> > +struct cxl_dev_state *cxl_accel_state_create(struct device *dev) {
> > +       struct cxl_dev_state *cxlds;
> > +
> > +       cxlds = devm_kzalloc(dev, sizeof(*cxlds), GFP_KERNEL);
> > +       if (!cxlds)
> > +               return ERR_PTR(-ENOMEM);
> > +
> > +       cxlds->dev = dev;
> > +       cxlds->type = CXL_DEVTYPE_DEVMEM;
> > +
> > +       return cxlds;
> > +}
> > +EXPORT_SYMBOL_NS_GPL(cxl_accel_state_create, CXL);
> > +
> >  static struct lock_class_key cxl_memdev_key;
> > 
> >  static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
> > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index
> > ad7f806549d3..89e560ea14c0 100644
> > --- a/drivers/cxl/cxlmem.h
> > +++ b/drivers/cxl/cxlmem.h
> > @@ -746,6 +746,7 @@ int cxl_await_media_ready(struct cxl_dev_state
> > *cxlds);  int cxl_enumerate_cmds(struct cxl_memdev_state *mds);  int
> > cxl_mem_create_range_info(struct cxl_memdev_state *mds);  struct
> > cxl_memdev_state *cxl_memdev_state_create(struct device *dev);
> > +struct cxl_dev_state *cxl_accel_state_create(struct device *dev);
> >  void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
> >                                 unsigned long *cmds);  void
> > clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, diff --git
> > a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index
> > e3f1b2e88e3e..385cdeeab22c 100644
> > --- a/tools/testing/cxl/test/cxl.c
> > +++ b/tools/testing/cxl/test/cxl.c
> > @@ -278,7 +278,7 @@ static struct {
> >                         },
> >                         .interleave_ways = 0,
> >                         .granularity = 4,
> > -                       .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
> > +                       .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE2 |
> >                                         ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
> >                         .qtg_id = 5,
> >                         .window_size = SZ_256M, @@ -713,7 +713,19 @@ static void
> > default_mock_decoder(struct cxl_decoder *cxld)
> > 
> >         cxld->interleave_ways = 1;
> >         cxld->interleave_granularity = 256;
> > -       cxld->target_type = CXL_DECODER_HOSTMEM;
> > +       if (is_endpoint_decoder(&cxld->dev)) {
> > +               struct cxl_endpoint_decoder *cxled;
> > +               struct cxl_dev_state *cxlds;
> > +               struct cxl_memdev *cxlmd;
> > +
> > +               cxled = to_cxl_endpoint_decoder(&cxld->dev);
> > +               cxlmd = cxled_to_memdev(cxled);
> > +               cxlds = cxlmd->cxlds;
> > +               if (cxlds->type == CXL_DEVTYPE_CLASSMEM)
> > +                       cxld->target_type = CXL_DECODER_HOSTMEM;
> > +               else
> > +                       cxld->target_type = CXL_DECODER_DEVMEM;
> > +       }
> >         cxld->commit = mock_decoder_commit;
> >         cxld->reset = mock_decoder_reset;  } diff --git
> > a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index
> > 6fb5718588f3..620bfcf5e5a5 100644
> > --- a/tools/testing/cxl/test/mem.c
> > +++ b/tools/testing/cxl/test/mem.c
> > @@ -1189,11 +1189,21 @@ static void label_area_release(void *lsa)
> >         vfree(lsa);
> >  }
> > 
> > +#define CXL_MOCKMEM_RCD BIT(0)
> > +#define CXL_MOCKMEM_TYPE2 BIT(1)
> > +
> >  static bool is_rcd(struct platform_device *pdev)  {
> >         const struct platform_device_id *id = platform_get_device_id(pdev);
> > 
> > -       return !!id->driver_data;
> > +       return !!(id->driver_data & CXL_MOCKMEM_RCD); }
> > +
> > +static bool is_type2(struct platform_device *pdev) {
> > +       const struct platform_device_id *id =
> > +platform_get_device_id(pdev);
> > +
> > +       return !!(id->driver_data & CXL_MOCKMEM_TYPE2);
> >  }
> > 
> >  static ssize_t event_trigger_store(struct device *dev, @@ -1205,7 +1215,7
> > @@ static ssize_t event_trigger_store(struct device *dev,  }  static
> > DEVICE_ATTR_WO(event_trigger);
> > 
> > -static int cxl_mock_mem_probe(struct platform_device *pdev)
> > +static int __cxl_mock_mem_probe(struct platform_device *pdev)
> >  {
> >         struct device *dev = &pdev->dev;
> >         struct cxl_memdev *cxlmd;
> > @@ -1274,6 +1284,75 @@ static int cxl_mock_mem_probe(struct
> > platform_device *pdev)
> >         return 0;
> >  }
> > 
> > +static int cxl_mock_type2_probe(struct platform_device *pdev) {
> > +       struct cxl_endpoint_decoder *cxled;
> > +       struct device *dev = &pdev->dev;
> > +       struct cxl_root_decoder *cxlrd;
> > +       struct cxl_dev_state *cxlds;
> > +       struct cxl_port *endpoint;
> > +       struct cxl_memdev *cxlmd;
> > +       resource_size_t max = 0;
> > +       int rc;
> > +
> > +       cxlds = cxl_accel_state_create(dev);
> > +       if (IS_ERR(cxlds))
> > +               return PTR_ERR(cxlds);
> > +
> > +       cxlds->serial = pdev->id;
> > +       cxlds->component_reg_phys = CXL_RESOURCE_NONE;
> > +       cxlds->dpa_res = DEFINE_RES_MEM(0, DEV_SIZE);
> > +       cxlds->ram_res = DEFINE_RES_MEM_NAMED(0, DEV_SIZE, "ram");
> > +       cxlds->pmem_res = DEFINE_RES_MEM_NAMED(DEV_SIZE, 0, "pmem");
> > +       if (is_rcd(pdev))
> > +               cxlds->rcd = true;
> > +
> > +       rc = request_resource(&cxlds->dpa_res, &cxlds->ram_res);
> > +       if (rc)
> > +               return rc;
> > +
> > +       cxlmd = devm_cxl_add_memdev(cxlds);
> > +       if (IS_ERR(cxlmd))
> > +               return PTR_ERR(cxlmd);
> > +
> > +       endpoint = cxl_acquire_endpoint(cxlmd);
> > +       if (IS_ERR(endpoint))
> > +               return PTR_ERR(endpoint);
> > +
> > +       cxlrd = cxl_hpa_freespace(endpoint, &endpoint->host_bridge, 1,
> > +                                 CXL_DECODER_F_RAM | CXL_DECODER_F_TYPE2,
> > +                                 &max);
> > +  
> 

> IIUC, finding free HPA space is for the case where the platform FW
> has not already allocated it and initialized the HDM ranges in the
> device decoders, correct? If the accelerator driver recognized that
> FW had initialized its HPA ranges in the device decoders (without
> committing/locking the decoders), could it skip the cxl_hpa_freespace
> call? It would seem reasonable for FW to init the decoder but not
> commit/lock it. 

I'd find it a bit odd if firmware did a partial job...
Why do you think it might?  To pass a hint to the kernel?


> 
> > +       if (IS_ERR(cxlrd)) {
> > +               rc = PTR_ERR(cxlrd);
> > +               goto out;
> > +       }
> > +
> > +       cxled = cxl_request_dpa(endpoint, CXL_DECODER_RAM, 0, max);
> > +       if (IS_ERR(cxled)) {
> > +               rc = PTR_ERR(cxled);
> > +               goto out_cxlrd;
> > +       }
> > +
> > +       /* A real driver would do something with the returned region */
> > +       rc = PTR_ERR_OR_ZERO(cxl_create_region(cxlrd, &cxled, 1));  
> 

> Assuming the accelerator driver wanted to add some, or all of its
> coherent memory to the kernel MM via add_memory_driver_managed, I
> think it would get the HPA ranges from the decoder's hpa_range field.
> But that API also needs a node ID. If the FW ACPI tables had shown
> the accelerator Generic Initiator Affinity structure, then I believe
> the accelerator's device struct should already have its numa node,
> and the same could be passed to add_memory_driver_managed. Does that
> sound right, or is there a better way to ensure the accelerator
> memory gets a distinct NUMA node?

If it has a GI node assigned I agree that probably makes sense.
There might be some fiddly corners where it doesn't but they are probably
the exception (multiple memory types, or different access characteristics,
need to represent some other topology complexities)


> If the ACPI tables had not shown
> the device as a generic initiator, is there any notion of the cxl
> memory device structs having a new/distinct NUMA node for the memory
> device, or would it just be pointing to the NUMA node of the
> associated CPU socket which has the host bridge or a default 0 NUMA
> node?

I'll leave this one for others (mostly :). I personally think current model is too
simplistic and we need to bite the bullet and work out how to do full
dynamic numa node creation rather that using some preallocated ones.

> 
> > +
> > +       put_device(cxled_dev(cxled));
> > +out_cxlrd:
> > +       put_device(cxlrd_dev(cxlrd));
> > +out:
> > +       cxl_release_endpoint(cxlmd, endpoint);
> > +
> > +       return rc;
> > +}
> > +
> > +static int cxl_mock_mem_probe(struct platform_device *pdev) {
> > +       if (is_type2(pdev))
> > +               return cxl_mock_type2_probe(pdev);
> > +       return __cxl_mock_mem_probe(pdev); }
> > +
> >  static ssize_t security_lock_show(struct device *dev,
> >                                   struct device_attribute *attr, char *buf)  { @@ -1316,7
> > +1395,7 @@ ATTRIBUTE_GROUPS(cxl_mock_mem);
> > 
> >  static const struct platform_device_id cxl_mock_mem_ids[] = {
> >         { .name = "cxl_mem", 0 },
> > -       { .name = "cxl_rcd", 1 },
> > +       { .name = "cxl_rcd", CXL_MOCKMEM_RCD | CXL_MOCKMEM_TYPE2 },
> >         { },
> >  };
> >  MODULE_DEVICE_TABLE(platform, cxl_mock_mem_ids);  
>
Vikram Sethi June 8, 2023, 2:34 p.m. UTC | #4
> From: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
> Sent: Thursday, June 8, 2023 5:47 AM
> To: Vikram Sethi <vsethi@nvidia.com>
> Cc: Dan Williams <dan.j.williams@intel.com>; linux-cxl@vger.kernel.org;
> ira.weiny@intel.com; navneet.singh@intel.com
> Subject: Re: [PATCH 19/19] tools/testing/cxl: Emulate a CXL accelerator with
> local memory
> On Wed, 7 Jun 2023 21:09:21 +0000
> Vikram Sethi <vsethi@nvidia.com> wrote:
> 
> > Thanks for posting this Dan.
> > > From: Dan Williams <dan.j.williams@intel.com>
> > > Sent: Sunday, June 4, 2023 6:33 PM
> > > To: linux-cxl@vger.kernel.org
> > > Cc: ira.weiny@intel.com; navneet.singh@intel.com
> > > Subject: [PATCH 19/19] tools/testing/cxl: Emulate a CXL accelerator
> > > with local memory
> > >
> > > Mock-up a device that does not have a standard mailbox, i.e. a
> > > device that does not implement the CXL memory-device class code, but
> > > wants to map "device" memory (aka Type-2, aka HDM-D[B], aka
> accelerator memory).
> > >
> > > +
> > > +       cxlrd = cxl_hpa_freespace(endpoint, &endpoint->host_bridge, 1,
> > > +                                 CXL_DECODER_F_RAM | CXL_DECODER_F_TYPE2,
> > > +                                 &max);
> > > +
> >
> 
> > IIUC, finding free HPA space is for the case where the platform FW has
> > not already allocated it and initialized the HDM ranges in the device
> > decoders, correct? If the accelerator driver recognized that FW had
> > initialized its HPA ranges in the device decoders (without
> > committing/locking the decoders), could it skip the cxl_hpa_freespace
> > call? It would seem reasonable for FW to init the decoder but not
> > commit/lock it.
> 
> I'd find it a bit odd if firmware did a partial job...
> Why do you think it might?  To pass a hint to the kernel?
> 
Firmware could certainly initialize, commit, and lock the decoder for accelerators that are soldered on the motherboard. 
I just wasn't sure if the CXL core code could deal with a committed and locked decoder. 
I was also thinking about chiplets within a package with new specifications like UCIe where it is possible that chip designers
assigned a fixed HPA range in the chip address map to a CXL device chiplet's HDM. Would it be sufficient for FW to convey this by committing and
locking the decoders, or would we need some new ACPI flags telling the kernel that this device's decoders can really decode a fixed HPA range and not to change the fixed values?
A similar notion exists in PCIe of fixed BARs called enhanced allocation with hardwired addresses.

> 
> >
> > > +       if (IS_ERR(cxlrd)) {
> > > +               rc = PTR_ERR(cxlrd);
> > > +               goto out;
> > > +       }
> > > +
> > > +       cxled = cxl_request_dpa(endpoint, CXL_DECODER_RAM, 0, max);
> > > +       if (IS_ERR(cxled)) {
> > > +               rc = PTR_ERR(cxled);
> > > +               goto out_cxlrd;
> > > +       }
> > > +
> > > +       /* A real driver would do something with the returned region */
> > > +       rc = PTR_ERR_OR_ZERO(cxl_create_region(cxlrd, &cxled, 1));
> >
> 
> > Assuming the accelerator driver wanted to add some, or all of its
> > coherent memory to the kernel MM via add_memory_driver_managed, I
> > think it would get the HPA ranges from the decoder's hpa_range field.
> > But that API also needs a node ID. If the FW ACPI tables had shown the
> > accelerator Generic Initiator Affinity structure, then I believe the
> > accelerator's device struct should already have its numa node, and the
> > same could be passed to add_memory_driver_managed. Does that sound
> > right, or is there a better way to ensure the accelerator memory gets
> > a distinct NUMA node?
> 
> If it has a GI node assigned I agree that probably makes sense.
> There might be some fiddly corners where it doesn't but they are probably
> the exception (multiple memory types, or different access characteristics,
> need to represent some other topology complexities)
> 
> 
> > If the ACPI tables had not shown
> > the device as a generic initiator, is there any notion of the cxl
> > memory device structs having a new/distinct NUMA node for the memory
> > device, or would it just be pointing to the NUMA node of the
> > associated CPU socket which has the host bridge or a default 0 NUMA
> > node?
> 
> I'll leave this one for others (mostly :). I personally think current model is too
> simplistic and we need to bite the bullet and work out how to do full dynamic
> numa node creation rather that using some preallocated ones.
> 
> >
> > > +
> > > +       put_device(cxled_dev(cxled));
> > > +out_cxlrd:
> > > +       put_device(cxlrd_dev(cxlrd));
> > > +out:
> > > +       cxl_release_endpoint(cxlmd, endpoint);
> > > +
> > > +       return rc;
> > > +}
> > > +
> > > +static int cxl_mock_mem_probe(struct platform_device *pdev) {
> > > +       if (is_type2(pdev))
> > > +               return cxl_mock_type2_probe(pdev);
> > > +       return __cxl_mock_mem_probe(pdev); }
> > > +
> > >  static ssize_t security_lock_show(struct device *dev,
> > >                                   struct device_attribute *attr,
> > > char *buf)  { @@ -1316,7
> > > +1395,7 @@ ATTRIBUTE_GROUPS(cxl_mock_mem);
> > >
> > >  static const struct platform_device_id cxl_mock_mem_ids[] = {
> > >         { .name = "cxl_mem", 0 },
> > > -       { .name = "cxl_rcd", 1 },
> > > +       { .name = "cxl_rcd", CXL_MOCKMEM_RCD | CXL_MOCKMEM_TYPE2
> },
> > >         { },
> > >  };
> > >  MODULE_DEVICE_TABLE(platform, cxl_mock_mem_ids);
> >
Jonathan Cameron June 8, 2023, 3:22 p.m. UTC | #5
On Thu, 8 Jun 2023 14:34:48 +0000
Vikram Sethi <vsethi@nvidia.com> wrote:

> > From: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
> > Sent: Thursday, June 8, 2023 5:47 AM
> > To: Vikram Sethi <vsethi@nvidia.com>
> > Cc: Dan Williams <dan.j.williams@intel.com>; linux-cxl@vger.kernel.org;
> > ira.weiny@intel.com; navneet.singh@intel.com
> > Subject: Re: [PATCH 19/19] tools/testing/cxl: Emulate a CXL accelerator with
> > local memory
> > On Wed, 7 Jun 2023 21:09:21 +0000
> > Vikram Sethi <vsethi@nvidia.com> wrote:
> >   
> > > Thanks for posting this Dan.  
> > > > From: Dan Williams <dan.j.williams@intel.com>
> > > > Sent: Sunday, June 4, 2023 6:33 PM
> > > > To: linux-cxl@vger.kernel.org
> > > > Cc: ira.weiny@intel.com; navneet.singh@intel.com
> > > > Subject: [PATCH 19/19] tools/testing/cxl: Emulate a CXL accelerator
> > > > with local memory
> > > >
> > > > Mock-up a device that does not have a standard mailbox, i.e. a
> > > > device that does not implement the CXL memory-device class code, but
> > > > wants to map "device" memory (aka Type-2, aka HDM-D[B], aka  
> > accelerator memory).  
> > > >
> > > > +
> > > > +       cxlrd = cxl_hpa_freespace(endpoint, &endpoint->host_bridge, 1,
> > > > +                                 CXL_DECODER_F_RAM | CXL_DECODER_F_TYPE2,
> > > > +                                 &max);
> > > > +  
> > >  
> >   
> > > IIUC, finding free HPA space is for the case where the platform FW has
> > > not already allocated it and initialized the HDM ranges in the device
> > > decoders, correct? If the accelerator driver recognized that FW had
> > > initialized its HPA ranges in the device decoders (without
> > > committing/locking the decoders), could it skip the cxl_hpa_freespace
> > > call? It would seem reasonable for FW to init the decoder but not
> > > commit/lock it.  
> > 
> > I'd find it a bit odd if firmware did a partial job...
> > Why do you think it might?  To pass a hint to the kernel?
> >   

> Firmware could certainly initialize, commit, and lock the decoder for
> accelerators that are soldered on the motherboard. I just wasn't sure
> if the CXL core code could deal with a committed and locked decoder. 

It can for type 3 devices, I haven't checked it still applies for
this type 2 code.

> I was also thinking about chiplets within a package with new
> specifications like UCIe where it is possible that chip designers
> assigned a fixed HPA range in the chip address map to a CXL device
> chiplet's HDM. Would it be sufficient for FW to convey this by
> committing and locking the decoders, or would we need some new ACPI
> flags telling the kernel that this device's decoders can really
> decode a fixed HPA range and not to change the fixed values? A
> similar notion exists in PCIe of fixed BARs called enhanced
> allocation with hardwired addresses.

If they are hard wired then FW should just lock them I think.

Jonathan
diff mbox series

Patch

diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 859c43c340bb..5d1ba7a72567 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -467,6 +467,21 @@  static void detach_memdev(struct work_struct *work)
 	put_device(&cxlmd->dev);
 }
 
+struct cxl_dev_state *cxl_accel_state_create(struct device *dev)
+{
+	struct cxl_dev_state *cxlds;
+
+	cxlds = devm_kzalloc(dev, sizeof(*cxlds), GFP_KERNEL);
+	if (!cxlds)
+		return ERR_PTR(-ENOMEM);
+
+	cxlds->dev = dev;
+	cxlds->type = CXL_DEVTYPE_DEVMEM;
+
+	return cxlds;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_accel_state_create, CXL);
+
 static struct lock_class_key cxl_memdev_key;
 
 static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index ad7f806549d3..89e560ea14c0 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -746,6 +746,7 @@  int cxl_await_media_ready(struct cxl_dev_state *cxlds);
 int cxl_enumerate_cmds(struct cxl_memdev_state *mds);
 int cxl_mem_create_range_info(struct cxl_memdev_state *mds);
 struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev);
+struct cxl_dev_state *cxl_accel_state_create(struct device *dev);
 void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
 				unsigned long *cmds);
 void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index e3f1b2e88e3e..385cdeeab22c 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -278,7 +278,7 @@  static struct {
 			},
 			.interleave_ways = 0,
 			.granularity = 4,
-			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE2 |
 					ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
 			.qtg_id = 5,
 			.window_size = SZ_256M,
@@ -713,7 +713,19 @@  static void default_mock_decoder(struct cxl_decoder *cxld)
 
 	cxld->interleave_ways = 1;
 	cxld->interleave_granularity = 256;
-	cxld->target_type = CXL_DECODER_HOSTMEM;
+	if (is_endpoint_decoder(&cxld->dev)) {
+		struct cxl_endpoint_decoder *cxled;
+		struct cxl_dev_state *cxlds;
+		struct cxl_memdev *cxlmd;
+
+		cxled = to_cxl_endpoint_decoder(&cxld->dev);
+		cxlmd = cxled_to_memdev(cxled);
+		cxlds = cxlmd->cxlds;
+		if (cxlds->type == CXL_DEVTYPE_CLASSMEM)
+			cxld->target_type = CXL_DECODER_HOSTMEM;
+		else
+			cxld->target_type = CXL_DECODER_DEVMEM;
+	}
 	cxld->commit = mock_decoder_commit;
 	cxld->reset = mock_decoder_reset;
 }
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 6fb5718588f3..620bfcf5e5a5 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -1189,11 +1189,21 @@  static void label_area_release(void *lsa)
 	vfree(lsa);
 }
 
+#define CXL_MOCKMEM_RCD BIT(0)
+#define CXL_MOCKMEM_TYPE2 BIT(1)
+
 static bool is_rcd(struct platform_device *pdev)
 {
 	const struct platform_device_id *id = platform_get_device_id(pdev);
 
-	return !!id->driver_data;
+	return !!(id->driver_data & CXL_MOCKMEM_RCD);
+}
+
+static bool is_type2(struct platform_device *pdev)
+{
+	const struct platform_device_id *id = platform_get_device_id(pdev);
+
+	return !!(id->driver_data & CXL_MOCKMEM_TYPE2);
 }
 
 static ssize_t event_trigger_store(struct device *dev,
@@ -1205,7 +1215,7 @@  static ssize_t event_trigger_store(struct device *dev,
 }
 static DEVICE_ATTR_WO(event_trigger);
 
-static int cxl_mock_mem_probe(struct platform_device *pdev)
+static int __cxl_mock_mem_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct cxl_memdev *cxlmd;
@@ -1274,6 +1284,75 @@  static int cxl_mock_mem_probe(struct platform_device *pdev)
 	return 0;
 }
 
+static int cxl_mock_type2_probe(struct platform_device *pdev)
+{
+	struct cxl_endpoint_decoder *cxled;
+	struct device *dev = &pdev->dev;
+	struct cxl_root_decoder *cxlrd;
+	struct cxl_dev_state *cxlds;
+	struct cxl_port *endpoint;
+	struct cxl_memdev *cxlmd;
+	resource_size_t max = 0;
+	int rc;
+
+	cxlds = cxl_accel_state_create(dev);
+	if (IS_ERR(cxlds))
+		return PTR_ERR(cxlds);
+
+	cxlds->serial = pdev->id;
+	cxlds->component_reg_phys = CXL_RESOURCE_NONE;
+	cxlds->dpa_res = DEFINE_RES_MEM(0, DEV_SIZE);
+	cxlds->ram_res = DEFINE_RES_MEM_NAMED(0, DEV_SIZE, "ram");
+	cxlds->pmem_res = DEFINE_RES_MEM_NAMED(DEV_SIZE, 0, "pmem");
+	if (is_rcd(pdev))
+		cxlds->rcd = true;
+
+	rc = request_resource(&cxlds->dpa_res, &cxlds->ram_res);
+	if (rc)
+		return rc;
+
+	cxlmd = devm_cxl_add_memdev(cxlds);
+	if (IS_ERR(cxlmd))
+		return PTR_ERR(cxlmd);
+
+	endpoint = cxl_acquire_endpoint(cxlmd);
+	if (IS_ERR(endpoint))
+		return PTR_ERR(endpoint);
+
+	cxlrd = cxl_hpa_freespace(endpoint, &endpoint->host_bridge, 1,
+				  CXL_DECODER_F_RAM | CXL_DECODER_F_TYPE2,
+				  &max);
+
+	if (IS_ERR(cxlrd)) {
+		rc = PTR_ERR(cxlrd);
+		goto out;
+	}
+
+	cxled = cxl_request_dpa(endpoint, CXL_DECODER_RAM, 0, max);
+	if (IS_ERR(cxled)) {
+		rc = PTR_ERR(cxled);
+		goto out_cxlrd;
+	}
+
+	/* A real driver would do something with the returned region */
+	rc = PTR_ERR_OR_ZERO(cxl_create_region(cxlrd, &cxled, 1));
+
+	put_device(cxled_dev(cxled));
+out_cxlrd:
+	put_device(cxlrd_dev(cxlrd));
+out:
+	cxl_release_endpoint(cxlmd, endpoint);
+
+	return rc;
+}
+
+static int cxl_mock_mem_probe(struct platform_device *pdev)
+{
+	if (is_type2(pdev))
+		return cxl_mock_type2_probe(pdev);
+	return __cxl_mock_mem_probe(pdev);
+}
+
 static ssize_t security_lock_show(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
@@ -1316,7 +1395,7 @@  ATTRIBUTE_GROUPS(cxl_mock_mem);
 
 static const struct platform_device_id cxl_mock_mem_ids[] = {
 	{ .name = "cxl_mem", 0 },
-	{ .name = "cxl_rcd", 1 },
+	{ .name = "cxl_rcd", CXL_MOCKMEM_RCD | CXL_MOCKMEM_TYPE2 },
 	{ },
 };
 MODULE_DEVICE_TABLE(platform, cxl_mock_mem_ids);