diff mbox series

[v3,10/20] cxl: indicate probe deferral

Message ID 20240907081836.5801-11-alejandro.lucero-palau@amd.com
State New
Headers show
Series cxl: add Type2 device support | expand

Commit Message

Lucero Palau, Alejandro Sept. 7, 2024, 8:18 a.m. UTC
From: Alejandro Lucero <alucerop@amd.com>

The first stop for a CXL accelerator driver that wants to establish new
CXL.mem regions is to register a 'struct cxl_memdev. That kicks off
cxl_mem_probe() to enumerate all 'struct cxl_port' instances in the
topology up to the root.

If the root driver has not attached yet the expectation is that the
driver waits until that link is established. The common cxl_pci_driver
has reason to keep the 'struct cxl_memdev' device attached to the bus
until the root driver attaches. An accelerator may want to instead defer
probing until CXL resources can be acquired.

Use the @endpoint attribute of a 'struct cxl_memdev' to convey when
accelerator driver probing should be deferred vs failed. Provide that
indication via a new cxl_acquire_endpoint() API that can retrieve the
probe status of the memdev.

Based on https://lore.kernel.org/linux-cxl/168592155270.1948938.11536845108449547920.stgit@dwillia2-xfh.jf.intel.com/

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Co-developed-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/memdev.c | 67 +++++++++++++++++++++++++++++++++++++++
 drivers/cxl/core/port.c   |  2 +-
 drivers/cxl/mem.c         |  4 ++-
 include/linux/cxl/cxl.h   |  2 ++
 4 files changed, 73 insertions(+), 2 deletions(-)

Comments

Li, Ming4 Sept. 10, 2024, 6:37 a.m. UTC | #1
On 9/7/2024 4:18 PM, alejandro.lucero-palau@amd.com wrote:
> From: Alejandro Lucero <alucerop@amd.com>
>
> The first stop for a CXL accelerator driver that wants to establish new
> CXL.mem regions is to register a 'struct cxl_memdev. That kicks off
> cxl_mem_probe() to enumerate all 'struct cxl_port' instances in the
> topology up to the root.
>
> If the root driver has not attached yet the expectation is that the
> driver waits until that link is established. The common cxl_pci_driver
> has reason to keep the 'struct cxl_memdev' device attached to the bus
> until the root driver attaches. An accelerator may want to instead defer
> probing until CXL resources can be acquired.
>
> Use the @endpoint attribute of a 'struct cxl_memdev' to convey when
> accelerator driver probing should be deferred vs failed. Provide that
> indication via a new cxl_acquire_endpoint() API that can retrieve the
> probe status of the memdev.
>
> Based on https://lore.kernel.org/linux-cxl/168592155270.1948938.11536845108449547920.stgit@dwillia2-xfh.jf.intel.com/
>
> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
> Co-developed-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/cxl/core/memdev.c | 67 +++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/core/port.c   |  2 +-
>  drivers/cxl/mem.c         |  4 ++-
>  include/linux/cxl/cxl.h   |  2 ++
>  4 files changed, 73 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index 5f8418620b70..d4406cf3ed32 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -5,6 +5,7 @@
>  #include <linux/io-64-nonatomic-lo-hi.h>
>  #include <linux/firmware.h>
>  #include <linux/device.h>
> +#include <linux/delay.h>
>  #include <linux/slab.h>
>  #include <linux/idr.h>
>  #include <linux/pci.h>
> @@ -23,6 +24,8 @@ static DECLARE_RWSEM(cxl_memdev_rwsem);
>  static int cxl_mem_major;
>  static DEFINE_IDA(cxl_memdev_ida);
>  
> +static unsigned short endpoint_ready_timeout = HZ;
> +
>  static void cxl_memdev_release(struct device *dev)
>  {
>  	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> @@ -1163,6 +1166,70 @@ struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
>  }
>  EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
>  
> +/*
> + * Try to get a locked reference on a memdev's CXL port topology
> + * connection. Be careful to observe when cxl_mem_probe() has deposited
> + * a probe deferral awaiting the arrival of the CXL root driver.
> + */
> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd)
> +{
> +	struct cxl_port *endpoint;
> +	unsigned long timeout;
> +	int rc = -ENXIO;
> +
> +	/*
> +	 * A memdev creation triggers ports creation through the kernel
> +	 * device object model. An endpoint port could not be created yet
> +	 * but coming. Wait here for a gentle space of time for ensuring
> +	 * and endpoint port not there is due to some error and not because
> +	 * the race described.
> +	 *
> +	 * Note this is a similar case this function is implemented for, but
> +	 * instead of the race with the root port, this is against its own
> +	 * endpoint port.
> +	 */
> +	timeout = jiffies + endpoint_ready_timeout;
> +	do {
> +		device_lock(&cxlmd->dev);
> +		endpoint = cxlmd->endpoint;
> +		if (endpoint)
> +			break;
> +		device_unlock(&cxlmd->dev);
> +		if (msleep_interruptible(100)) {
> +			device_lock(&cxlmd->dev);
> +			break;

Can exit directly. not need to hold the lock of cxlmd->dev then break.


> +		}
> +	} while (!time_after(jiffies, timeout));
> +
> +	if (!endpoint)
> +		goto err;
> +
> +	if (IS_ERR(endpoint)) {
> +		rc = PTR_ERR(endpoint);
> +		goto err;
> +	}
> +
> +	device_lock(&endpoint->dev);
> +	if (!endpoint->dev.driver)
> +		goto err_endpoint;
> +
> +	return endpoint;
> +
> +err_endpoint:
> +	device_unlock(&endpoint->dev);
> +err:
> +	device_unlock(&cxlmd->dev);
> +	return ERR_PTR(rc);
> +}
> +EXPORT_SYMBOL_NS(cxl_acquire_endpoint, CXL);
> +
> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
> +{
> +	device_unlock(&endpoint->dev);
> +	device_unlock(&cxlmd->dev);
> +}
> +EXPORT_SYMBOL_NS(cxl_release_endpoint, CXL);
> +
>  static void sanitize_teardown_notifier(void *data)
>  {
>  	struct cxl_memdev_state *mds = data;
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index 39b20ddd0296..ca2c993faa9c 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -1554,7 +1554,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd,
>  		 */
>  		dev_dbg(&cxlmd->dev, "%s is a root dport\n",
>  			dev_name(dport_dev));
> -		return -ENXIO;
> +		return -EPROBE_DEFER;
>  	}
>  
>  	parent_port = find_cxl_port(dparent, &parent_dport);
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index 5c7ad230bccb..56fd7a100c2f 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -145,8 +145,10 @@ static int cxl_mem_probe(struct device *dev)
>  		return rc;
>  
>  	rc = devm_cxl_enumerate_ports(cxlmd);
> -	if (rc)
> +	if (rc) {
> +		cxlmd->endpoint = ERR_PTR(rc);
>  		return rc;
> +	}
>  
>  	parent_port = cxl_mem_find_port(cxlmd, &dport);
>  	if (!parent_port) {
> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
> index fc0859f841dc..7e4580fb8659 100644
> --- a/include/linux/cxl/cxl.h
> +++ b/include/linux/cxl/cxl.h
> @@ -57,4 +57,6 @@ int cxl_release_resource(struct cxl_dev_state *cxlds, enum cxl_resource type);
>  void cxl_set_media_ready(struct cxl_dev_state *cxlds);
>  struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
>  				       struct cxl_dev_state *cxlds);
> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd);
> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint);
>  #endif
Zhi Wang Sept. 12, 2024, 9:19 a.m. UTC | #2
On Sat, 7 Sep 2024 09:18:26 +0100
<alejandro.lucero-palau@amd.com> wrote:

> From: Alejandro Lucero <alucerop@amd.com>
> 

Hi Alejandro:

When working with V2, I noticed that if CONFIG_CXL_MEM=m and cxl_mem.ko
is not loaded, loading the type-2 driver would fail on
cxl_acquire_endpoint(). Not sure if you met the same problem.

Now we are waiting for it to be loaded, it seems not ideal with the
problem.

Thanks,
Zhi.

> The first stop for a CXL accelerator driver that wants to establish
> new CXL.mem regions is to register a 'struct cxl_memdev. That kicks
> off cxl_mem_probe() to enumerate all 'struct cxl_port' instances in
> the topology up to the root.
> 
> If the root driver has not attached yet the expectation is that the
> driver waits until that link is established. The common cxl_pci_driver
> has reason to keep the 'struct cxl_memdev' device attached to the bus
> until the root driver attaches. An accelerator may want to instead
> defer probing until CXL resources can be acquired.
> 
> Use the @endpoint attribute of a 'struct cxl_memdev' to convey when
> accelerator driver probing should be deferred vs failed. Provide that
> indication via a new cxl_acquire_endpoint() API that can retrieve the
> probe status of the memdev.
> 
> Based on
> https://lore.kernel.org/linux-cxl/168592155270.1948938.11536845108449547920.stgit@dwillia2-xfh.jf.intel.com/
> 
> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
> Co-developed-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/cxl/core/memdev.c | 67
> +++++++++++++++++++++++++++++++++++++++ drivers/cxl/core/port.c   |
> 2 +- drivers/cxl/mem.c         |  4 ++-
>  include/linux/cxl/cxl.h   |  2 ++
>  4 files changed, 73 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index 5f8418620b70..d4406cf3ed32 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -5,6 +5,7 @@
>  #include <linux/io-64-nonatomic-lo-hi.h>
>  #include <linux/firmware.h>
>  #include <linux/device.h>
> +#include <linux/delay.h>
>  #include <linux/slab.h>
>  #include <linux/idr.h>
>  #include <linux/pci.h>
> @@ -23,6 +24,8 @@ static DECLARE_RWSEM(cxl_memdev_rwsem);
>  static int cxl_mem_major;
>  static DEFINE_IDA(cxl_memdev_ida);
>  
> +static unsigned short endpoint_ready_timeout = HZ;
> +
>  static void cxl_memdev_release(struct device *dev)
>  {
>  	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> @@ -1163,6 +1166,70 @@ struct cxl_memdev *devm_cxl_add_memdev(struct
> device *host, }
>  EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
>  
> +/*
> + * Try to get a locked reference on a memdev's CXL port topology
> + * connection. Be careful to observe when cxl_mem_probe() has
> deposited
> + * a probe deferral awaiting the arrival of the CXL root driver.
> + */
> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd)
> +{
> +	struct cxl_port *endpoint;
> +	unsigned long timeout;
> +	int rc = -ENXIO;
> +
> +	/*
> +	 * A memdev creation triggers ports creation through the
> kernel
> +	 * device object model. An endpoint port could not be
> created yet
> +	 * but coming. Wait here for a gentle space of time for
> ensuring
> +	 * and endpoint port not there is due to some error and not
> because
> +	 * the race described.
> +	 *
> +	 * Note this is a similar case this function is implemented
> for, but
> +	 * instead of the race with the root port, this is against
> its own
> +	 * endpoint port.
> +	 */
> +	timeout = jiffies + endpoint_ready_timeout;
> +	do {
> +		device_lock(&cxlmd->dev);
> +		endpoint = cxlmd->endpoint;
> +		if (endpoint)
> +			break;
> +		device_unlock(&cxlmd->dev);
> +		if (msleep_interruptible(100)) {
> +			device_lock(&cxlmd->dev);
> +			break;
> +		}
> +	} while (!time_after(jiffies, timeout));
> +
> +	if (!endpoint)
> +		goto err;
> +
> +	if (IS_ERR(endpoint)) {
> +		rc = PTR_ERR(endpoint);
> +		goto err;
> +	}
> +
> +	device_lock(&endpoint->dev);
> +	if (!endpoint->dev.driver)
> +		goto err_endpoint;
> +
> +	return endpoint;
> +
> +err_endpoint:
> +	device_unlock(&endpoint->dev);
> +err:
> +	device_unlock(&cxlmd->dev);
> +	return ERR_PTR(rc);
> +}
> +EXPORT_SYMBOL_NS(cxl_acquire_endpoint, CXL);
> +
> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port
> *endpoint) +{
> +	device_unlock(&endpoint->dev);
> +	device_unlock(&cxlmd->dev);
> +}
> +EXPORT_SYMBOL_NS(cxl_release_endpoint, CXL);
> +
>  static void sanitize_teardown_notifier(void *data)
>  {
>  	struct cxl_memdev_state *mds = data;
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index 39b20ddd0296..ca2c993faa9c 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -1554,7 +1554,7 @@ static int add_port_attach_ep(struct cxl_memdev
> *cxlmd, */
>  		dev_dbg(&cxlmd->dev, "%s is a root dport\n",
>  			dev_name(dport_dev));
> -		return -ENXIO;
> +		return -EPROBE_DEFER;
>  	}
>  
>  	parent_port = find_cxl_port(dparent, &parent_dport);
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index 5c7ad230bccb..56fd7a100c2f 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -145,8 +145,10 @@ static int cxl_mem_probe(struct device *dev)
>  		return rc;
>  
>  	rc = devm_cxl_enumerate_ports(cxlmd);
> -	if (rc)
> +	if (rc) {
> +		cxlmd->endpoint = ERR_PTR(rc);
>  		return rc;
> +	}
>  
>  	parent_port = cxl_mem_find_port(cxlmd, &dport);
>  	if (!parent_port) {
> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
> index fc0859f841dc..7e4580fb8659 100644
> --- a/include/linux/cxl/cxl.h
> +++ b/include/linux/cxl/cxl.h
> @@ -57,4 +57,6 @@ int cxl_release_resource(struct cxl_dev_state
> *cxlds, enum cxl_resource type); void cxl_set_media_ready(struct
> cxl_dev_state *cxlds); struct cxl_memdev *devm_cxl_add_memdev(struct
> device *host, struct cxl_dev_state *cxlds);
> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd);
> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port
> *endpoint); #endif
Jonathan Cameron Sept. 13, 2024, 5:43 p.m. UTC | #3
On Sat, 7 Sep 2024 09:18:26 +0100
alejandro.lucero-palau@amd.com wrote:

> From: Alejandro Lucero <alucerop@amd.com>
> 
> The first stop for a CXL accelerator driver that wants to establish new
> CXL.mem regions is to register a 'struct cxl_memdev. That kicks off
> cxl_mem_probe() to enumerate all 'struct cxl_port' instances in the
> topology up to the root.
> 
> If the root driver has not attached yet the expectation is that the
> driver waits until that link is established. The common cxl_pci_driver
> has reason to keep the 'struct cxl_memdev' device attached to the bus
> until the root driver attaches. An accelerator may want to instead defer
> probing until CXL resources can be acquired.
> 
> Use the @endpoint attribute of a 'struct cxl_memdev' to convey when
> accelerator driver probing should be deferred vs failed. Provide that
> indication via a new cxl_acquire_endpoint() API that can retrieve the
> probe status of the memdev.
> 
> Based on https://lore.kernel.org/linux-cxl/168592155270.1948938.11536845108449547920.stgit@dwillia2-xfh.jf.intel.com/
> 
> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
> Co-developed-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/cxl/core/memdev.c | 67 +++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/core/port.c   |  2 +-
>  drivers/cxl/mem.c         |  4 ++-
>  include/linux/cxl/cxl.h   |  2 ++
>  4 files changed, 73 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index 5f8418620b70..d4406cf3ed32 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -5,6 +5,7 @@
>  #include <linux/io-64-nonatomic-lo-hi.h>
>  #include <linux/firmware.h>
>  #include <linux/device.h>
> +#include <linux/delay.h>
>  #include <linux/slab.h>
>  #include <linux/idr.h>
>  #include <linux/pci.h>
> @@ -23,6 +24,8 @@ static DECLARE_RWSEM(cxl_memdev_rwsem);
>  static int cxl_mem_major;
>  static DEFINE_IDA(cxl_memdev_ida);
>  
> +static unsigned short endpoint_ready_timeout = HZ;
> +
>  static void cxl_memdev_release(struct device *dev)
>  {
>  	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> @@ -1163,6 +1166,70 @@ struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
>  }
>  EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
>  
> +/*
> + * Try to get a locked reference on a memdev's CXL port topology
> + * connection. Be careful to observe when cxl_mem_probe() has deposited
> + * a probe deferral awaiting the arrival of the CXL root driver.
> + */
> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd)
> +{
> +	struct cxl_port *endpoint;
> +	unsigned long timeout;
> +	int rc = -ENXIO;
> +
> +	/*
> +	 * A memdev creation triggers ports creation through the kernel
> +	 * device object model. An endpoint port could not be created yet
> +	 * but coming. Wait here for a gentle space of time for ensuring
> +	 * and endpoint port not there is due to some error and not because
> +	 * the race described.
> +	 *
> +	 * Note this is a similar case this function is implemented for, but
> +	 * instead of the race with the root port, this is against its own
> +	 * endpoint port.

This dance is nasty and there is no real guarantee it will even help.

We need a better solution. I'm not quite sure on what it is though.

Is there any precedence for similar 'wait a bit and hope'
in the kernel?

> +	 */
> +	timeout = jiffies + endpoint_ready_timeout;
> +	do {
> +		device_lock(&cxlmd->dev);
> +		endpoint = cxlmd->endpoint;
> +		if (endpoint)
> +			break;
> +		device_unlock(&cxlmd->dev);
> +		if (msleep_interruptible(100)) {
> +			device_lock(&cxlmd->dev);
> +			break;
> +		}
> +	} while (!time_after(jiffies, timeout));
> +
> +	if (!endpoint)
> +		goto err;
> +
> +	if (IS_ERR(endpoint)) {
> +		rc = PTR_ERR(endpoint);
> +		goto err;
> +	}
> +
> +	device_lock(&endpoint->dev);
> +	if (!endpoint->dev.driver)
> +		goto err_endpoint;
> +
> +	return endpoint;
> +
> +err_endpoint:
> +	device_unlock(&endpoint->dev);
> +err:
> +	device_unlock(&cxlmd->dev);
> +	return ERR_PTR(rc);
> +}
> +EXPORT_SYMBOL_NS(cxl_acquire_endpoint, CXL);
> +
> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
> +{
> +	device_unlock(&endpoint->dev);
> +	device_unlock(&cxlmd->dev);
> +}
> +EXPORT_SYMBOL_NS(cxl_release_endpoint, CXL);
> +
>  static void sanitize_teardown_notifier(void *data)
>  {
>  	struct cxl_memdev_state *mds = data;
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index 39b20ddd0296..ca2c993faa9c 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -1554,7 +1554,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd,
>  		 */
>  		dev_dbg(&cxlmd->dev, "%s is a root dport\n",
>  			dev_name(dport_dev));
> -		return -ENXIO;
> +		return -EPROBE_DEFER;
>  	}
>  
>  	parent_port = find_cxl_port(dparent, &parent_dport);
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index 5c7ad230bccb..56fd7a100c2f 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -145,8 +145,10 @@ static int cxl_mem_probe(struct device *dev)
>  		return rc;
>  
>  	rc = devm_cxl_enumerate_ports(cxlmd);
> -	if (rc)
> +	if (rc) {
> +		cxlmd->endpoint = ERR_PTR(rc);
>  		return rc;
> +	}
>  
>  	parent_port = cxl_mem_find_port(cxlmd, &dport);
>  	if (!parent_port) {
> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
> index fc0859f841dc..7e4580fb8659 100644
> --- a/include/linux/cxl/cxl.h
> +++ b/include/linux/cxl/cxl.h
> @@ -57,4 +57,6 @@ int cxl_release_resource(struct cxl_dev_state *cxlds, enum cxl_resource type);
>  void cxl_set_media_ready(struct cxl_dev_state *cxlds);
>  struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
>  				       struct cxl_dev_state *cxlds);
> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd);
> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint);
>  #endif
Alejandro Lucero Palau Sept. 16, 2024, 8:24 a.m. UTC | #4
On 9/10/24 07:37, Li, Ming4 wrote:
> On 9/7/2024 4:18 PM, alejandro.lucero-palau@amd.com wrote:
>> From: Alejandro Lucero <alucerop@amd.com>
>>
>> The first stop for a CXL accelerator driver that wants to establish new
>> CXL.mem regions is to register a 'struct cxl_memdev. That kicks off
>> cxl_mem_probe() to enumerate all 'struct cxl_port' instances in the
>> topology up to the root.
>>
>> If the root driver has not attached yet the expectation is that the
>> driver waits until that link is established. The common cxl_pci_driver
>> has reason to keep the 'struct cxl_memdev' device attached to the bus
>> until the root driver attaches. An accelerator may want to instead defer
>> probing until CXL resources can be acquired.
>>
>> Use the @endpoint attribute of a 'struct cxl_memdev' to convey when
>> accelerator driver probing should be deferred vs failed. Provide that
>> indication via a new cxl_acquire_endpoint() API that can retrieve the
>> probe status of the memdev.
>>
>> Based on https://lore.kernel.org/linux-cxl/168592155270.1948938.11536845108449547920.stgit@dwillia2-xfh.jf.intel.com/
>>
>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
>> Co-developed-by: Dan Williams <dan.j.williams@intel.com>
>> ---
>>   drivers/cxl/core/memdev.c | 67 +++++++++++++++++++++++++++++++++++++++
>>   drivers/cxl/core/port.c   |  2 +-
>>   drivers/cxl/mem.c         |  4 ++-
>>   include/linux/cxl/cxl.h   |  2 ++
>>   4 files changed, 73 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
>> index 5f8418620b70..d4406cf3ed32 100644
>> --- a/drivers/cxl/core/memdev.c
>> +++ b/drivers/cxl/core/memdev.c
>> @@ -5,6 +5,7 @@
>>   #include <linux/io-64-nonatomic-lo-hi.h>
>>   #include <linux/firmware.h>
>>   #include <linux/device.h>
>> +#include <linux/delay.h>
>>   #include <linux/slab.h>
>>   #include <linux/idr.h>
>>   #include <linux/pci.h>
>> @@ -23,6 +24,8 @@ static DECLARE_RWSEM(cxl_memdev_rwsem);
>>   static int cxl_mem_major;
>>   static DEFINE_IDA(cxl_memdev_ida);
>>   
>> +static unsigned short endpoint_ready_timeout = HZ;
>> +
>>   static void cxl_memdev_release(struct device *dev)
>>   {
>>   	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
>> @@ -1163,6 +1166,70 @@ struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
>>   }
>>   EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
>>   
>> +/*
>> + * Try to get a locked reference on a memdev's CXL port topology
>> + * connection. Be careful to observe when cxl_mem_probe() has deposited
>> + * a probe deferral awaiting the arrival of the CXL root driver.
>> + */
>> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd)
>> +{
>> +	struct cxl_port *endpoint;
>> +	unsigned long timeout;
>> +	int rc = -ENXIO;
>> +
>> +	/*
>> +	 * A memdev creation triggers ports creation through the kernel
>> +	 * device object model. An endpoint port could not be created yet
>> +	 * but coming. Wait here for a gentle space of time for ensuring
>> +	 * and endpoint port not there is due to some error and not because
>> +	 * the race described.
>> +	 *
>> +	 * Note this is a similar case this function is implemented for, but
>> +	 * instead of the race with the root port, this is against its own
>> +	 * endpoint port.
>> +	 */
>> +	timeout = jiffies + endpoint_ready_timeout;
>> +	do {
>> +		device_lock(&cxlmd->dev);
>> +		endpoint = cxlmd->endpoint;
>> +		if (endpoint)
>> +			break;
>> +		device_unlock(&cxlmd->dev);
>> +		if (msleep_interruptible(100)) {
>> +			device_lock(&cxlmd->dev);
>> +			break;
> Can exit directly. not need to hold the lock of cxlmd->dev then break.


Not sure if it is safe to do device_unlock twice, but even if so, it 
looks better to my eyes to get the lock or if not to add another error path.



>
>> +		}
>> +	} while (!time_after(jiffies, timeout));
>> +
>> +	if (!endpoint)
>> +		goto err;
>> +
>> +	if (IS_ERR(endpoint)) {
>> +		rc = PTR_ERR(endpoint);
>> +		goto err;
>> +	}
>> +
>> +	device_lock(&endpoint->dev);
>> +	if (!endpoint->dev.driver)
>> +		goto err_endpoint;
>> +
>> +	return endpoint;
>> +
>> +err_endpoint:
>> +	device_unlock(&endpoint->dev);
>> +err:
>> +	device_unlock(&cxlmd->dev);
>> +	return ERR_PTR(rc);
>> +}
>> +EXPORT_SYMBOL_NS(cxl_acquire_endpoint, CXL);
>> +
>> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
>> +{
>> +	device_unlock(&endpoint->dev);
>> +	device_unlock(&cxlmd->dev);
>> +}
>> +EXPORT_SYMBOL_NS(cxl_release_endpoint, CXL);
>> +
>>   static void sanitize_teardown_notifier(void *data)
>>   {
>>   	struct cxl_memdev_state *mds = data;
>> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
>> index 39b20ddd0296..ca2c993faa9c 100644
>> --- a/drivers/cxl/core/port.c
>> +++ b/drivers/cxl/core/port.c
>> @@ -1554,7 +1554,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd,
>>   		 */
>>   		dev_dbg(&cxlmd->dev, "%s is a root dport\n",
>>   			dev_name(dport_dev));
>> -		return -ENXIO;
>> +		return -EPROBE_DEFER;
>>   	}
>>   
>>   	parent_port = find_cxl_port(dparent, &parent_dport);
>> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
>> index 5c7ad230bccb..56fd7a100c2f 100644
>> --- a/drivers/cxl/mem.c
>> +++ b/drivers/cxl/mem.c
>> @@ -145,8 +145,10 @@ static int cxl_mem_probe(struct device *dev)
>>   		return rc;
>>   
>>   	rc = devm_cxl_enumerate_ports(cxlmd);
>> -	if (rc)
>> +	if (rc) {
>> +		cxlmd->endpoint = ERR_PTR(rc);
>>   		return rc;
>> +	}
>>   
>>   	parent_port = cxl_mem_find_port(cxlmd, &dport);
>>   	if (!parent_port) {
>> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
>> index fc0859f841dc..7e4580fb8659 100644
>> --- a/include/linux/cxl/cxl.h
>> +++ b/include/linux/cxl/cxl.h
>> @@ -57,4 +57,6 @@ int cxl_release_resource(struct cxl_dev_state *cxlds, enum cxl_resource type);
>>   void cxl_set_media_ready(struct cxl_dev_state *cxlds);
>>   struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
>>   				       struct cxl_dev_state *cxlds);
>> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd);
>> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint);
>>   #endif
>
Alejandro Lucero Palau Sept. 16, 2024, 10:08 a.m. UTC | #5
On 9/12/24 10:19, Zhi Wang wrote:
> On Sat, 7 Sep 2024 09:18:26 +0100
> <alejandro.lucero-palau@amd.com> wrote:
>
>> From: Alejandro Lucero <alucerop@amd.com>
>>
> Hi Alejandro:
>
> When working with V2, I noticed that if CONFIG_CXL_MEM=m and cxl_mem.ko
> is not loaded, loading the type-2 driver would fail on
> cxl_acquire_endpoint(). Not sure if you met the same problem.


I think I have some problems with kernel build depending on if CXL code 
is configured as modules, and even if CXL is not configured at all, what 
it was raised by the kernel build robot.

I'll work on this for v4.

Thanks!


> Now we are waiting for it to be loaded, it seems not ideal with the
> problem.
>
> Thanks,
> Zhi.
>
>> The first stop for a CXL accelerator driver that wants to establish
>> new CXL.mem regions is to register a 'struct cxl_memdev. That kicks
>> off cxl_mem_probe() to enumerate all 'struct cxl_port' instances in
>> the topology up to the root.
>>
>> If the root driver has not attached yet the expectation is that the
>> driver waits until that link is established. The common cxl_pci_driver
>> has reason to keep the 'struct cxl_memdev' device attached to the bus
>> until the root driver attaches. An accelerator may want to instead
>> defer probing until CXL resources can be acquired.
>>
>> Use the @endpoint attribute of a 'struct cxl_memdev' to convey when
>> accelerator driver probing should be deferred vs failed. Provide that
>> indication via a new cxl_acquire_endpoint() API that can retrieve the
>> probe status of the memdev.
>>
>> Based on
>> https://lore.kernel.org/linux-cxl/168592155270.1948938.11536845108449547920.stgit@dwillia2-xfh.jf.intel.com/
>>
>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
>> Co-developed-by: Dan Williams <dan.j.williams@intel.com>
>> ---
>>   drivers/cxl/core/memdev.c | 67
>> +++++++++++++++++++++++++++++++++++++++ drivers/cxl/core/port.c   |
>> 2 +- drivers/cxl/mem.c         |  4 ++-
>>   include/linux/cxl/cxl.h   |  2 ++
>>   4 files changed, 73 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
>> index 5f8418620b70..d4406cf3ed32 100644
>> --- a/drivers/cxl/core/memdev.c
>> +++ b/drivers/cxl/core/memdev.c
>> @@ -5,6 +5,7 @@
>>   #include <linux/io-64-nonatomic-lo-hi.h>
>>   #include <linux/firmware.h>
>>   #include <linux/device.h>
>> +#include <linux/delay.h>
>>   #include <linux/slab.h>
>>   #include <linux/idr.h>
>>   #include <linux/pci.h>
>> @@ -23,6 +24,8 @@ static DECLARE_RWSEM(cxl_memdev_rwsem);
>>   static int cxl_mem_major;
>>   static DEFINE_IDA(cxl_memdev_ida);
>>   
>> +static unsigned short endpoint_ready_timeout = HZ;
>> +
>>   static void cxl_memdev_release(struct device *dev)
>>   {
>>   	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
>> @@ -1163,6 +1166,70 @@ struct cxl_memdev *devm_cxl_add_memdev(struct
>> device *host, }
>>   EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
>>   
>> +/*
>> + * Try to get a locked reference on a memdev's CXL port topology
>> + * connection. Be careful to observe when cxl_mem_probe() has
>> deposited
>> + * a probe deferral awaiting the arrival of the CXL root driver.
>> + */
>> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd)
>> +{
>> +	struct cxl_port *endpoint;
>> +	unsigned long timeout;
>> +	int rc = -ENXIO;
>> +
>> +	/*
>> +	 * A memdev creation triggers ports creation through the
>> kernel
>> +	 * device object model. An endpoint port could not be
>> created yet
>> +	 * but coming. Wait here for a gentle space of time for
>> ensuring
>> +	 * and endpoint port not there is due to some error and not
>> because
>> +	 * the race described.
>> +	 *
>> +	 * Note this is a similar case this function is implemented
>> for, but
>> +	 * instead of the race with the root port, this is against
>> its own
>> +	 * endpoint port.
>> +	 */
>> +	timeout = jiffies + endpoint_ready_timeout;
>> +	do {
>> +		device_lock(&cxlmd->dev);
>> +		endpoint = cxlmd->endpoint;
>> +		if (endpoint)
>> +			break;
>> +		device_unlock(&cxlmd->dev);
>> +		if (msleep_interruptible(100)) {
>> +			device_lock(&cxlmd->dev);
>> +			break;
>> +		}
>> +	} while (!time_after(jiffies, timeout));
>> +
>> +	if (!endpoint)
>> +		goto err;
>> +
>> +	if (IS_ERR(endpoint)) {
>> +		rc = PTR_ERR(endpoint);
>> +		goto err;
>> +	}
>> +
>> +	device_lock(&endpoint->dev);
>> +	if (!endpoint->dev.driver)
>> +		goto err_endpoint;
>> +
>> +	return endpoint;
>> +
>> +err_endpoint:
>> +	device_unlock(&endpoint->dev);
>> +err:
>> +	device_unlock(&cxlmd->dev);
>> +	return ERR_PTR(rc);
>> +}
>> +EXPORT_SYMBOL_NS(cxl_acquire_endpoint, CXL);
>> +
>> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port
>> *endpoint) +{
>> +	device_unlock(&endpoint->dev);
>> +	device_unlock(&cxlmd->dev);
>> +}
>> +EXPORT_SYMBOL_NS(cxl_release_endpoint, CXL);
>> +
>>   static void sanitize_teardown_notifier(void *data)
>>   {
>>   	struct cxl_memdev_state *mds = data;
>> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
>> index 39b20ddd0296..ca2c993faa9c 100644
>> --- a/drivers/cxl/core/port.c
>> +++ b/drivers/cxl/core/port.c
>> @@ -1554,7 +1554,7 @@ static int add_port_attach_ep(struct cxl_memdev
>> *cxlmd, */
>>   		dev_dbg(&cxlmd->dev, "%s is a root dport\n",
>>   			dev_name(dport_dev));
>> -		return -ENXIO;
>> +		return -EPROBE_DEFER;
>>   	}
>>   
>>   	parent_port = find_cxl_port(dparent, &parent_dport);
>> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
>> index 5c7ad230bccb..56fd7a100c2f 100644
>> --- a/drivers/cxl/mem.c
>> +++ b/drivers/cxl/mem.c
>> @@ -145,8 +145,10 @@ static int cxl_mem_probe(struct device *dev)
>>   		return rc;
>>   
>>   	rc = devm_cxl_enumerate_ports(cxlmd);
>> -	if (rc)
>> +	if (rc) {
>> +		cxlmd->endpoint = ERR_PTR(rc);
>>   		return rc;
>> +	}
>>   
>>   	parent_port = cxl_mem_find_port(cxlmd, &dport);
>>   	if (!parent_port) {
>> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
>> index fc0859f841dc..7e4580fb8659 100644
>> --- a/include/linux/cxl/cxl.h
>> +++ b/include/linux/cxl/cxl.h
>> @@ -57,4 +57,6 @@ int cxl_release_resource(struct cxl_dev_state
>> *cxlds, enum cxl_resource type); void cxl_set_media_ready(struct
>> cxl_dev_state *cxlds); struct cxl_memdev *devm_cxl_add_memdev(struct
>> device *host, struct cxl_dev_state *cxlds);
>> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd);
>> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port
>> *endpoint); #endif
Alejandro Lucero Palau Sept. 16, 2024, 1:24 p.m. UTC | #6
On 9/13/24 18:43, Jonathan Cameron wrote:
> On Sat, 7 Sep 2024 09:18:26 +0100
> alejandro.lucero-palau@amd.com wrote:
>
>> From: Alejandro Lucero <alucerop@amd.com>
>>
>> The first stop for a CXL accelerator driver that wants to establish new
>> CXL.mem regions is to register a 'struct cxl_memdev. That kicks off
>> cxl_mem_probe() to enumerate all 'struct cxl_port' instances in the
>> topology up to the root.
>>
>> If the root driver has not attached yet the expectation is that the
>> driver waits until that link is established. The common cxl_pci_driver
>> has reason to keep the 'struct cxl_memdev' device attached to the bus
>> until the root driver attaches. An accelerator may want to instead defer
>> probing until CXL resources can be acquired.
>>
>> Use the @endpoint attribute of a 'struct cxl_memdev' to convey when
>> accelerator driver probing should be deferred vs failed. Provide that
>> indication via a new cxl_acquire_endpoint() API that can retrieve the
>> probe status of the memdev.
>>
>> Based on https://lore.kernel.org/linux-cxl/168592155270.1948938.11536845108449547920.stgit@dwillia2-xfh.jf.intel.com/
>>
>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
>> Co-developed-by: Dan Williams <dan.j.williams@intel.com>
>> ---
>>   drivers/cxl/core/memdev.c | 67 +++++++++++++++++++++++++++++++++++++++
>>   drivers/cxl/core/port.c   |  2 +-
>>   drivers/cxl/mem.c         |  4 ++-
>>   include/linux/cxl/cxl.h   |  2 ++
>>   4 files changed, 73 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
>> index 5f8418620b70..d4406cf3ed32 100644
>> --- a/drivers/cxl/core/memdev.c
>> +++ b/drivers/cxl/core/memdev.c
>> @@ -5,6 +5,7 @@
>>   #include <linux/io-64-nonatomic-lo-hi.h>
>>   #include <linux/firmware.h>
>>   #include <linux/device.h>
>> +#include <linux/delay.h>
>>   #include <linux/slab.h>
>>   #include <linux/idr.h>
>>   #include <linux/pci.h>
>> @@ -23,6 +24,8 @@ static DECLARE_RWSEM(cxl_memdev_rwsem);
>>   static int cxl_mem_major;
>>   static DEFINE_IDA(cxl_memdev_ida);
>>   
>> +static unsigned short endpoint_ready_timeout = HZ;
>> +
>>   static void cxl_memdev_release(struct device *dev)
>>   {
>>   	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
>> @@ -1163,6 +1166,70 @@ struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
>>   }
>>   EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
>>   
>> +/*
>> + * Try to get a locked reference on a memdev's CXL port topology
>> + * connection. Be careful to observe when cxl_mem_probe() has deposited
>> + * a probe deferral awaiting the arrival of the CXL root driver.
>> + */
>> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd)
>> +{
>> +	struct cxl_port *endpoint;
>> +	unsigned long timeout;
>> +	int rc = -ENXIO;
>> +
>> +	/*
>> +	 * A memdev creation triggers ports creation through the kernel
>> +	 * device object model. An endpoint port could not be created yet
>> +	 * but coming. Wait here for a gentle space of time for ensuring
>> +	 * and endpoint port not there is due to some error and not because
>> +	 * the race described.
>> +	 *
>> +	 * Note this is a similar case this function is implemented for, but
>> +	 * instead of the race with the root port, this is against its own
>> +	 * endpoint port.
> This dance is nasty and there is no real guarantee it will even help.


With all due respect, we know Dan's dancing credentials. Which are 
yours?  ... :-)


So, I found this when testing. The driver calls devm_cxl_add_memdev and 
then cxl_acquire_endpoint, and the endpoint is not there yet.

Interestingly, I did not suffer it initially, so I do not know what 
makes it trigger, but I have just tested it again and if I remove the 
iteration with the timeout, the calls fails (it seems it does 
deterministically). This is with a testing/developing VM with no other 
main processing at the time of driver binding, and with 8 cores 
available. Of course, if the driver does the second call without 
interruption, it is unlikely another core can handle the bus 
notification and create the endpoint faster. But I wonder why this is 
(was) not always happening.

I agree the timeout could not be enough in some situations. I think it 
all depends on when bus_notify is invoked, but I have not dug deeper.


>
> We need a better solution. I'm not quite sure on what it is though.
>
> Is there any precedence for similar 'wait a bit and hope'
> in the kernel?


You put it in a way that makes me miserable ... but maybe you are right, 
since there is no certainty it will be done after the timeout, but it is 
a full second ...

I will dig a bit ...

Thanks


>
>> +	 */
>> +	timeout = jiffies + endpoint_ready_timeout;
>> +	do {
>> +		device_lock(&cxlmd->dev);
>> +		endpoint = cxlmd->endpoint;
>> +		if (endpoint)
>> +			break;
>> +		device_unlock(&cxlmd->dev);
>> +		if (msleep_interruptible(100)) {
>> +			device_lock(&cxlmd->dev);
>> +			break;
>> +		}
>> +	} while (!time_after(jiffies, timeout));
>> +
>> +	if (!endpoint)
>> +		goto err;
>> +
>> +	if (IS_ERR(endpoint)) {
>> +		rc = PTR_ERR(endpoint);
>> +		goto err;
>> +	}
>> +
>> +	device_lock(&endpoint->dev);
>> +	if (!endpoint->dev.driver)
>> +		goto err_endpoint;
>> +
>> +	return endpoint;
>> +
>> +err_endpoint:
>> +	device_unlock(&endpoint->dev);
>> +err:
>> +	device_unlock(&cxlmd->dev);
>> +	return ERR_PTR(rc);
>> +}
>> +EXPORT_SYMBOL_NS(cxl_acquire_endpoint, CXL);
>> +
>> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
>> +{
>> +	device_unlock(&endpoint->dev);
>> +	device_unlock(&cxlmd->dev);
>> +}
>> +EXPORT_SYMBOL_NS(cxl_release_endpoint, CXL);
>> +
>>   static void sanitize_teardown_notifier(void *data)
>>   {
>>   	struct cxl_memdev_state *mds = data;
>> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
>> index 39b20ddd0296..ca2c993faa9c 100644
>> --- a/drivers/cxl/core/port.c
>> +++ b/drivers/cxl/core/port.c
>> @@ -1554,7 +1554,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd,
>>   		 */
>>   		dev_dbg(&cxlmd->dev, "%s is a root dport\n",
>>   			dev_name(dport_dev));
>> -		return -ENXIO;
>> +		return -EPROBE_DEFER;
>>   	}
>>   
>>   	parent_port = find_cxl_port(dparent, &parent_dport);
>> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
>> index 5c7ad230bccb..56fd7a100c2f 100644
>> --- a/drivers/cxl/mem.c
>> +++ b/drivers/cxl/mem.c
>> @@ -145,8 +145,10 @@ static int cxl_mem_probe(struct device *dev)
>>   		return rc;
>>   
>>   	rc = devm_cxl_enumerate_ports(cxlmd);
>> -	if (rc)
>> +	if (rc) {
>> +		cxlmd->endpoint = ERR_PTR(rc);
>>   		return rc;
>> +	}
>>   
>>   	parent_port = cxl_mem_find_port(cxlmd, &dport);
>>   	if (!parent_port) {
>> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
>> index fc0859f841dc..7e4580fb8659 100644
>> --- a/include/linux/cxl/cxl.h
>> +++ b/include/linux/cxl/cxl.h
>> @@ -57,4 +57,6 @@ int cxl_release_resource(struct cxl_dev_state *cxlds, enum cxl_resource type);
>>   void cxl_set_media_ready(struct cxl_dev_state *cxlds);
>>   struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
>>   				       struct cxl_dev_state *cxlds);
>> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd);
>> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint);
>>   #endif
Li, Ming4 Sept. 17, 2024, 3:31 a.m. UTC | #7
On 9/16/2024 4:24 PM, Alejandro Lucero Palau wrote:
>
> On 9/10/24 07:37, Li, Ming4 wrote:
>> On 9/7/2024 4:18 PM, alejandro.lucero-palau@amd.com wrote:
>>> From: Alejandro Lucero <alucerop@amd.com>
>>>
>>> The first stop for a CXL accelerator driver that wants to establish new
>>> CXL.mem regions is to register a 'struct cxl_memdev. That kicks off
>>> cxl_mem_probe() to enumerate all 'struct cxl_port' instances in the
>>> topology up to the root.
>>>
>>> If the root driver has not attached yet the expectation is that the
>>> driver waits until that link is established. The common cxl_pci_driver
>>> has reason to keep the 'struct cxl_memdev' device attached to the bus
>>> until the root driver attaches. An accelerator may want to instead defer
>>> probing until CXL resources can be acquired.
>>>
>>> Use the @endpoint attribute of a 'struct cxl_memdev' to convey when
>>> accelerator driver probing should be deferred vs failed. Provide that
>>> indication via a new cxl_acquire_endpoint() API that can retrieve the
>>> probe status of the memdev.
>>>
>>> Based on https://lore.kernel.org/linux-cxl/168592155270.1948938.11536845108449547920.stgit@dwillia2-xfh.jf.intel.com/
>>>
>>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
>>> Co-developed-by: Dan Williams <dan.j.williams@intel.com>
>>> ---
>>>   drivers/cxl/core/memdev.c | 67 +++++++++++++++++++++++++++++++++++++++
>>>   drivers/cxl/core/port.c   |  2 +-
>>>   drivers/cxl/mem.c         |  4 ++-
>>>   include/linux/cxl/cxl.h   |  2 ++
>>>   4 files changed, 73 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
>>> index 5f8418620b70..d4406cf3ed32 100644
>>> --- a/drivers/cxl/core/memdev.c
>>> +++ b/drivers/cxl/core/memdev.c
>>> @@ -5,6 +5,7 @@
>>>   #include <linux/io-64-nonatomic-lo-hi.h>
>>>   #include <linux/firmware.h>
>>>   #include <linux/device.h>
>>> +#include <linux/delay.h>
>>>   #include <linux/slab.h>
>>>   #include <linux/idr.h>
>>>   #include <linux/pci.h>
>>> @@ -23,6 +24,8 @@ static DECLARE_RWSEM(cxl_memdev_rwsem);
>>>   static int cxl_mem_major;
>>>   static DEFINE_IDA(cxl_memdev_ida);
>>>   +static unsigned short endpoint_ready_timeout = HZ;
>>> +
>>>   static void cxl_memdev_release(struct device *dev)
>>>   {
>>>       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
>>> @@ -1163,6 +1166,70 @@ struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
>>>   }
>>>   EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
>>>   +/*
>>> + * Try to get a locked reference on a memdev's CXL port topology
>>> + * connection. Be careful to observe when cxl_mem_probe() has deposited
>>> + * a probe deferral awaiting the arrival of the CXL root driver.
>>> + */
>>> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd)
>>> +{
>>> +    struct cxl_port *endpoint;
>>> +    unsigned long timeout;
>>> +    int rc = -ENXIO;
>>> +
>>> +    /*
>>> +     * A memdev creation triggers ports creation through the kernel
>>> +     * device object model. An endpoint port could not be created yet
>>> +     * but coming. Wait here for a gentle space of time for ensuring
>>> +     * and endpoint port not there is due to some error and not because
>>> +     * the race described.
>>> +     *
>>> +     * Note this is a similar case this function is implemented for, but
>>> +     * instead of the race with the root port, this is against its own
>>> +     * endpoint port.
>>> +     */
>>> +    timeout = jiffies + endpoint_ready_timeout;
>>> +    do {
>>> +        device_lock(&cxlmd->dev);
>>> +        endpoint = cxlmd->endpoint;
>>> +        if (endpoint)
>>> +            break;
>>> +        device_unlock(&cxlmd->dev);
>>> +        if (msleep_interruptible(100)) {
>>> +            device_lock(&cxlmd->dev);
>>> +            break;
>> Can exit directly. not need to hold the lock of cxlmd->dev then break.
>
>
> Not sure if it is safe to do device_unlock twice, but even if so, it looks better to my eyes to get the lock or if not to add another error path.
>
why device_unlock will be called twice? directly return the value of rc like below if the sleep is interrupted.

    if (msleep_interruptible(100))

            return ERR_PTR(rc);


>
>
>>
>>> +        }
>>> +    } while (!time_after(jiffies, timeout));

Another issue I noticed is that above loop will not hold the device lock if timeout happened(without msleep interrupted), but below "goto err" will call device_unlock() for the device.

I think below 'if (!endpoint)' can also return the value of rc. Combine above changes, I think the code should be:

    do {

        ......

        if (msleep_interruptible(100))

                break;

    } while (!time_after(jiffies, timeout));

    if (!endpoint)

                return ERR_PTR(rc);


Does it make more sense?


>>> +
>>> +    if (!endpoint)
>>> +        goto err;
>>> +
>>> +    if (IS_ERR(endpoint)) {
>>> +        rc = PTR_ERR(endpoint);
>>> +        goto err;
>>> +    }
>>> +
>>> +    device_lock(&endpoint->dev);
>>> +    if (!endpoint->dev.driver)
>>> +        goto err_endpoint;
>>> +
>>> +    return endpoint;
>>> +
>>> +err_endpoint:
>>> +    device_unlock(&endpoint->dev);
>>> +err:
>>> +    device_unlock(&cxlmd->dev);
>>> +    return ERR_PTR(rc);
>>> +}
>>> +EXPORT_SYMBOL_NS(cxl_acquire_endpoint, CXL);
>>> +
>>> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
>>> +{
>>> +    device_unlock(&endpoint->dev);
>>> +    device_unlock(&cxlmd->dev);
>>> +}
>>> +EXPORT_SYMBOL_NS(cxl_release_endpoint, CXL);
>>> +
>>>   static void sanitize_teardown_notifier(void *data)
>>>   {
>>>       struct cxl_memdev_state *mds = data;
>>> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
>>> index 39b20ddd0296..ca2c993faa9c 100644
>>> --- a/drivers/cxl/core/port.c
>>> +++ b/drivers/cxl/core/port.c
>>> @@ -1554,7 +1554,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd,
>>>            */
>>>           dev_dbg(&cxlmd->dev, "%s is a root dport\n",
>>>               dev_name(dport_dev));
>>> -        return -ENXIO;
>>> +        return -EPROBE_DEFER;
>>>       }
>>>         parent_port = find_cxl_port(dparent, &parent_dport);
>>> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
>>> index 5c7ad230bccb..56fd7a100c2f 100644
>>> --- a/drivers/cxl/mem.c
>>> +++ b/drivers/cxl/mem.c
>>> @@ -145,8 +145,10 @@ static int cxl_mem_probe(struct device *dev)
>>>           return rc;
>>>         rc = devm_cxl_enumerate_ports(cxlmd);
>>> -    if (rc)
>>> +    if (rc) {
>>> +        cxlmd->endpoint = ERR_PTR(rc);
>>>           return rc;
>>> +    }
>>>         parent_port = cxl_mem_find_port(cxlmd, &dport);
>>>       if (!parent_port) {
>>> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
>>> index fc0859f841dc..7e4580fb8659 100644
>>> --- a/include/linux/cxl/cxl.h
>>> +++ b/include/linux/cxl/cxl.h
>>> @@ -57,4 +57,6 @@ int cxl_release_resource(struct cxl_dev_state *cxlds, enum cxl_resource type);
>>>   void cxl_set_media_ready(struct cxl_dev_state *cxlds);
>>>   struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
>>>                          struct cxl_dev_state *cxlds);
>>> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd);
>>> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint);
>>>   #endif
>>
Alejandro Lucero Palau Sept. 17, 2024, 9:16 a.m. UTC | #8
On 9/17/24 04:31, Li, Ming4 wrote:
> On 9/16/2024 4:24 PM, Alejandro Lucero Palau wrote:
>> On 9/10/24 07:37, Li, Ming4 wrote:
>>> On 9/7/2024 4:18 PM, alejandro.lucero-palau@amd.com wrote:
>>>> From: Alejandro Lucero <alucerop@amd.com>
>>>>
>>>> The first stop for a CXL accelerator driver that wants to establish new
>>>> CXL.mem regions is to register a 'struct cxl_memdev. That kicks off
>>>> cxl_mem_probe() to enumerate all 'struct cxl_port' instances in the
>>>> topology up to the root.
>>>>
>>>> If the root driver has not attached yet the expectation is that the
>>>> driver waits until that link is established. The common cxl_pci_driver
>>>> has reason to keep the 'struct cxl_memdev' device attached to the bus
>>>> until the root driver attaches. An accelerator may want to instead defer
>>>> probing until CXL resources can be acquired.
>>>>
>>>> Use the @endpoint attribute of a 'struct cxl_memdev' to convey when
>>>> accelerator driver probing should be deferred vs failed. Provide that
>>>> indication via a new cxl_acquire_endpoint() API that can retrieve the
>>>> probe status of the memdev.
>>>>
>>>> Based on https://lore.kernel.org/linux-cxl/168592155270.1948938.11536845108449547920.stgit@dwillia2-xfh.jf.intel.com/
>>>>
>>>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
>>>> Co-developed-by: Dan Williams <dan.j.williams@intel.com>
>>>> ---
>>>>    drivers/cxl/core/memdev.c | 67 +++++++++++++++++++++++++++++++++++++++
>>>>    drivers/cxl/core/port.c   |  2 +-
>>>>    drivers/cxl/mem.c         |  4 ++-
>>>>    include/linux/cxl/cxl.h   |  2 ++
>>>>    4 files changed, 73 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
>>>> index 5f8418620b70..d4406cf3ed32 100644
>>>> --- a/drivers/cxl/core/memdev.c
>>>> +++ b/drivers/cxl/core/memdev.c
>>>> @@ -5,6 +5,7 @@
>>>>    #include <linux/io-64-nonatomic-lo-hi.h>
>>>>    #include <linux/firmware.h>
>>>>    #include <linux/device.h>
>>>> +#include <linux/delay.h>
>>>>    #include <linux/slab.h>
>>>>    #include <linux/idr.h>
>>>>    #include <linux/pci.h>
>>>> @@ -23,6 +24,8 @@ static DECLARE_RWSEM(cxl_memdev_rwsem);
>>>>    static int cxl_mem_major;
>>>>    static DEFINE_IDA(cxl_memdev_ida);
>>>>    +static unsigned short endpoint_ready_timeout = HZ;
>>>> +
>>>>    static void cxl_memdev_release(struct device *dev)
>>>>    {
>>>>        struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
>>>> @@ -1163,6 +1166,70 @@ struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
>>>>    }
>>>>    EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
>>>>    +/*
>>>> + * Try to get a locked reference on a memdev's CXL port topology
>>>> + * connection. Be careful to observe when cxl_mem_probe() has deposited
>>>> + * a probe deferral awaiting the arrival of the CXL root driver.
>>>> + */
>>>> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd)
>>>> +{
>>>> +    struct cxl_port *endpoint;
>>>> +    unsigned long timeout;
>>>> +    int rc = -ENXIO;
>>>> +
>>>> +    /*
>>>> +     * A memdev creation triggers ports creation through the kernel
>>>> +     * device object model. An endpoint port could not be created yet
>>>> +     * but coming. Wait here for a gentle space of time for ensuring
>>>> +     * and endpoint port not there is due to some error and not because
>>>> +     * the race described.
>>>> +     *
>>>> +     * Note this is a similar case this function is implemented for, but
>>>> +     * instead of the race with the root port, this is against its own
>>>> +     * endpoint port.
>>>> +     */
>>>> +    timeout = jiffies + endpoint_ready_timeout;
>>>> +    do {
>>>> +        device_lock(&cxlmd->dev);
>>>> +        endpoint = cxlmd->endpoint;
>>>> +        if (endpoint)
>>>> +            break;
>>>> +        device_unlock(&cxlmd->dev);
>>>> +        if (msleep_interruptible(100)) {
>>>> +            device_lock(&cxlmd->dev);
>>>> +            break;
>>> Can exit directly. not need to hold the lock of cxlmd->dev then break.
>>
>> Not sure if it is safe to do device_unlock twice, but even if so, it looks better to my eyes to get the lock or if not to add another error path.
>>
> why device_unlock will be called twice? directly return the value of rc like below if the sleep is interrupted.
>
>      if (msleep_interruptible(100))
>
>              return ERR_PTR(rc);
>
>

You are right.


>>
>>>> +        }
>>>> +    } while (!time_after(jiffies, timeout));
> Another issue I noticed is that above loop will not hold the device lock if timeout happened(without msleep interrupted), but below "goto err" will call device_unlock() for the device.
>
> I think below 'if (!endpoint)' can also return the value of rc. Combine above changes, I think the code should be:
>
>      do {
>
>          ......
>
>          if (msleep_interruptible(100))
>
>                  break;
>
>      } while (!time_after(jiffies, timeout));
>
>      if (!endpoint)
>
>                  return ERR_PTR(rc);
>
>
> Does it make more sense?


Right again.

I can see it now.

Thank you!


>
>>>> +
>>>> +    if (!endpoint)
>>>> +        goto err;
>>>> +
>>>> +    if (IS_ERR(endpoint)) {
>>>> +        rc = PTR_ERR(endpoint);
>>>> +        goto err;
>>>> +    }
>>>> +
>>>> +    device_lock(&endpoint->dev);
>>>> +    if (!endpoint->dev.driver)
>>>> +        goto err_endpoint;
>>>> +
>>>> +    return endpoint;
>>>> +
>>>> +err_endpoint:
>>>> +    device_unlock(&endpoint->dev);
>>>> +err:
>>>> +    device_unlock(&cxlmd->dev);
>>>> +    return ERR_PTR(rc);
>>>> +}
>>>> +EXPORT_SYMBOL_NS(cxl_acquire_endpoint, CXL);
>>>> +
>>>> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
>>>> +{
>>>> +    device_unlock(&endpoint->dev);
>>>> +    device_unlock(&cxlmd->dev);
>>>> +}
>>>> +EXPORT_SYMBOL_NS(cxl_release_endpoint, CXL);
>>>> +
>>>>    static void sanitize_teardown_notifier(void *data)
>>>>    {
>>>>        struct cxl_memdev_state *mds = data;
>>>> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
>>>> index 39b20ddd0296..ca2c993faa9c 100644
>>>> --- a/drivers/cxl/core/port.c
>>>> +++ b/drivers/cxl/core/port.c
>>>> @@ -1554,7 +1554,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd,
>>>>             */
>>>>            dev_dbg(&cxlmd->dev, "%s is a root dport\n",
>>>>                dev_name(dport_dev));
>>>> -        return -ENXIO;
>>>> +        return -EPROBE_DEFER;
>>>>        }
>>>>          parent_port = find_cxl_port(dparent, &parent_dport);
>>>> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
>>>> index 5c7ad230bccb..56fd7a100c2f 100644
>>>> --- a/drivers/cxl/mem.c
>>>> +++ b/drivers/cxl/mem.c
>>>> @@ -145,8 +145,10 @@ static int cxl_mem_probe(struct device *dev)
>>>>            return rc;
>>>>          rc = devm_cxl_enumerate_ports(cxlmd);
>>>> -    if (rc)
>>>> +    if (rc) {
>>>> +        cxlmd->endpoint = ERR_PTR(rc);
>>>>            return rc;
>>>> +    }
>>>>          parent_port = cxl_mem_find_port(cxlmd, &dport);
>>>>        if (!parent_port) {
>>>> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
>>>> index fc0859f841dc..7e4580fb8659 100644
>>>> --- a/include/linux/cxl/cxl.h
>>>> +++ b/include/linux/cxl/cxl.h
>>>> @@ -57,4 +57,6 @@ int cxl_release_resource(struct cxl_dev_state *cxlds, enum cxl_resource type);
>>>>    void cxl_set_media_ready(struct cxl_dev_state *cxlds);
>>>>    struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
>>>>                           struct cxl_dev_state *cxlds);
>>>> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd);
>>>> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint);
>>>>    #endif
diff mbox series

Patch

diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 5f8418620b70..d4406cf3ed32 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -5,6 +5,7 @@ 
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/firmware.h>
 #include <linux/device.h>
+#include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/pci.h>
@@ -23,6 +24,8 @@  static DECLARE_RWSEM(cxl_memdev_rwsem);
 static int cxl_mem_major;
 static DEFINE_IDA(cxl_memdev_ida);
 
+static unsigned short endpoint_ready_timeout = HZ;
+
 static void cxl_memdev_release(struct device *dev)
 {
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
@@ -1163,6 +1166,70 @@  struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
 }
 EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
 
+/*
+ * Try to get a locked reference on a memdev's CXL port topology
+ * connection. Be careful to observe when cxl_mem_probe() has deposited
+ * a probe deferral awaiting the arrival of the CXL root driver.
+ */
+struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd)
+{
+	struct cxl_port *endpoint;
+	unsigned long timeout;
+	int rc = -ENXIO;
+
+	/*
+	 * A memdev creation triggers ports creation through the kernel
+	 * device object model. An endpoint port could not be created yet
+	 * but coming. Wait here for a gentle space of time for ensuring
+	 * and endpoint port not there is due to some error and not because
+	 * the race described.
+	 *
+	 * Note this is a similar case this function is implemented for, but
+	 * instead of the race with the root port, this is against its own
+	 * endpoint port.
+	 */
+	timeout = jiffies + endpoint_ready_timeout;
+	do {
+		device_lock(&cxlmd->dev);
+		endpoint = cxlmd->endpoint;
+		if (endpoint)
+			break;
+		device_unlock(&cxlmd->dev);
+		if (msleep_interruptible(100)) {
+			device_lock(&cxlmd->dev);
+			break;
+		}
+	} while (!time_after(jiffies, timeout));
+
+	if (!endpoint)
+		goto err;
+
+	if (IS_ERR(endpoint)) {
+		rc = PTR_ERR(endpoint);
+		goto err;
+	}
+
+	device_lock(&endpoint->dev);
+	if (!endpoint->dev.driver)
+		goto err_endpoint;
+
+	return endpoint;
+
+err_endpoint:
+	device_unlock(&endpoint->dev);
+err:
+	device_unlock(&cxlmd->dev);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_NS(cxl_acquire_endpoint, CXL);
+
+void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
+{
+	device_unlock(&endpoint->dev);
+	device_unlock(&cxlmd->dev);
+}
+EXPORT_SYMBOL_NS(cxl_release_endpoint, CXL);
+
 static void sanitize_teardown_notifier(void *data)
 {
 	struct cxl_memdev_state *mds = data;
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 39b20ddd0296..ca2c993faa9c 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1554,7 +1554,7 @@  static int add_port_attach_ep(struct cxl_memdev *cxlmd,
 		 */
 		dev_dbg(&cxlmd->dev, "%s is a root dport\n",
 			dev_name(dport_dev));
-		return -ENXIO;
+		return -EPROBE_DEFER;
 	}
 
 	parent_port = find_cxl_port(dparent, &parent_dport);
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 5c7ad230bccb..56fd7a100c2f 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -145,8 +145,10 @@  static int cxl_mem_probe(struct device *dev)
 		return rc;
 
 	rc = devm_cxl_enumerate_ports(cxlmd);
-	if (rc)
+	if (rc) {
+		cxlmd->endpoint = ERR_PTR(rc);
 		return rc;
+	}
 
 	parent_port = cxl_mem_find_port(cxlmd, &dport);
 	if (!parent_port) {
diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
index fc0859f841dc..7e4580fb8659 100644
--- a/include/linux/cxl/cxl.h
+++ b/include/linux/cxl/cxl.h
@@ -57,4 +57,6 @@  int cxl_release_resource(struct cxl_dev_state *cxlds, enum cxl_resource type);
 void cxl_set_media_ready(struct cxl_dev_state *cxlds);
 struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
 				       struct cxl_dev_state *cxlds);
+struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd);
+void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint);
 #endif