diff mbox series

[07/13] cxl/memdev: Determine CXL.mem capability

Message ID 20210902195017.2516472-8-ben.widawsky@intel.com
State New, archived
Headers show
Series Enumerate midlevel and endpoint decoders | expand

Commit Message

Ben Widawsky Sept. 2, 2021, 7:50 p.m. UTC
If the "upstream" port of the endpoint is an enumerated downstream CXL
port, and the device itself is CXL capable and enabled, the memdev
driver can bind. This binding useful for region configuration/creation
because it provides a clean way for the region code to determine if the
memdev is actually CXL capable.

A memdev/hostbridge probe race is solved with a full CXL bus rescan at
the end of ACPI probing (see comment in code for details). Switch
enumeration will be done as a follow-on patch. As a result, if a switch
is in the topology the memdev driver will not bind to any devices.

CXL.mem capability is checked lazily at the time a region is bound.
This is in line with the other configuration parameters.

Below is an example (mem0, and mem1) of CXL memdev devices that now
exist on the bus.

/sys/bus/cxl/devices/
├── decoder0.0 -> ../../../devices/platform/ACPI0017:00/root0/decoder0.0
├── mem0 -> ../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem0
├── mem1 -> ../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem1
├── pmem0 -> ../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem0/pmem0
├── pmem1 -> ../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem1/pmem1
├── port1 -> ../../../devices/platform/ACPI0017:00/root0/port1
└── root0 -> ../../../devices/platform/ACPI0017:00/root0

Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
---
 drivers/cxl/acpi.c        | 27 +++++++-----------
 drivers/cxl/core/bus.c    | 60 +++++++++++++++++++++++++++++++++++++++
 drivers/cxl/core/memdev.c |  6 ++++
 drivers/cxl/cxl.h         |  2 ++
 drivers/cxl/cxlmem.h      |  2 ++
 drivers/cxl/mem.c         | 55 ++++++++++++++++++++++++++++++++++-
 drivers/cxl/pci.c         | 23 ---------------
 drivers/cxl/pci.h         |  7 ++++-
 8 files changed, 141 insertions(+), 41 deletions(-)

Comments

Jonathan Cameron Sept. 3, 2021, 3:21 p.m. UTC | #1
On Thu, 2 Sep 2021 12:50:11 -0700
Ben Widawsky <ben.widawsky@intel.com> wrote:

> If the "upstream" port of the endpoint is an enumerated downstream CXL
> port, and the device itself is CXL capable and enabled, the memdev
> driver can bind. This binding useful for region configuration/creation
> because it provides a clean way for the region code to determine if the
> memdev is actually CXL capable.
> 
> A memdev/hostbridge probe race is solved with a full CXL bus rescan at
> the end of ACPI probing (see comment in code for details). Switch
> enumeration will be done as a follow-on patch. As a result, if a switch
> is in the topology the memdev driver will not bind to any devices.
> 
> CXL.mem capability is checked lazily at the time a region is bound.
> This is in line with the other configuration parameters.
> 
> Below is an example (mem0, and mem1) of CXL memdev devices that now
> exist on the bus.
> 
> /sys/bus/cxl/devices/
> ├── decoder0.0 -> ../../../devices/platform/ACPI0017:00/root0/decoder0.0
> ├── mem0 -> ../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem0
> ├── mem1 -> ../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem1
> ├── pmem0 -> ../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem0/pmem0
> ├── pmem1 -> ../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem1/pmem1
> ├── port1 -> ../../../devices/platform/ACPI0017:00/root0/port1
> └── root0 -> ../../../devices/platform/ACPI0017:00/root0
> 
> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>

+CC Bjorn.  Given we are moving the (nearly) generic DVSEC lookup routine, perhaps now
is time to move it into PCI core code?

> ---
>  drivers/cxl/acpi.c        | 27 +++++++-----------
>  drivers/cxl/core/bus.c    | 60 +++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/core/memdev.c |  6 ++++
>  drivers/cxl/cxl.h         |  2 ++
>  drivers/cxl/cxlmem.h      |  2 ++
>  drivers/cxl/mem.c         | 55 ++++++++++++++++++++++++++++++++++-
>  drivers/cxl/pci.c         | 23 ---------------
>  drivers/cxl/pci.h         |  7 ++++-
>  8 files changed, 141 insertions(+), 41 deletions(-)
> 

...

> diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c
> index 256e55dc2a3b..56f57302d27b 100644
> --- a/drivers/cxl/core/bus.c
> +++ b/drivers/cxl/core/bus.c

...

> @@ -596,6 +625,37 @@ int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld)
>  }
>  EXPORT_SYMBOL_GPL(cxl_decoder_autoremove);
>  
> +/**
> + * cxl_pci_dvsec - Gets offset for the given DVSEC id
> + * @pdev: PCI device to search for the DVSEC
> + * @dvsec: DVSEC id to look for
> + *
> + * Return: offset within the PCI header for the given DVSEC id. 0 if not found
> + */
> +int cxl_pci_dvsec(struct pci_dev *pdev, int dvsec)
> +{
> +	int pos;
> +
> +	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
> +	if (!pos)
> +		return 0;
> +
> +	while (pos) {
> +		u16 vendor, id;
> +
> +		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
> +		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
> +		if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
> +			return pos;
> +
> +		pos = pci_find_next_ext_capability(pdev, pos,
> +						   PCI_EXT_CAP_ID_DVSEC);
> +	}
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(cxl_mem_dvsec);

That's not going to work. (pci/mem)  + Can we move this to the PCI core now?
Wasn't done originally because there were several copies in various different
trees that needed to all come together. Oddly only this one seems to have
made it in though.  


> +
>  /**
>   * __cxl_driver_register - register a driver for the cxl bus
>   * @cxl_drv: cxl driver structure to attach
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index c9dd054bd813..0068b5ff5f3e 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -337,3 +337,9 @@ void cxl_memdev_exit(void)
>  {
>  	unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
>  }
> +
> +bool is_cxl_mem_capable(struct cxl_memdev *cxlmd)
> +{

This feels like it needs a comment to say why that's a valid check to use
to find out if it is mem capable.

> +	return !!cxlmd->dev.driver;
> +}
> +EXPORT_SYMBOL_GPL(is_cxl_mem_capable);
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index b48bdbefd949..a168520d741b 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -283,8 +283,10 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
>  				   resource_size_t component_reg_phys,
>  				   struct cxl_port *parent_port);
>  
> +bool is_cxl_port(struct device *dev);
>  int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id,
>  		  resource_size_t component_reg_phys);
> +struct cxl_dport *find_dport_by_dev(struct cxl_port *port, const struct device *dev);
>  
>  struct cxl_decoder *to_cxl_decoder(struct device *dev);
>  bool is_root_decoder(struct device *dev);
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 811b24451604..88264204c4b9 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -51,6 +51,8 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
>  struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
>  				       struct cxl_mem *cxlm);
>  
> +bool is_cxl_mem_capable(struct cxl_memdev *cxlmd);
> +
>  /**
>   * struct cxl_mbox_cmd - A command to be submitted to hardware.
>   * @opcode: (input) The command set and command submitted to hardware.
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index 978a54b0a51a..b6dc34d18a86 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -2,8 +2,10 @@
>  /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
>  #include <linux/device.h>
>  #include <linux/module.h>
> +#include <linux/pci.h>
>  
>  #include "cxlmem.h"
> +#include "pci.h"
>  
>  /**
>   * DOC: cxl mem
> @@ -17,9 +19,60 @@
>   * components.
>   */
>  
> +static int port_match(struct device *dev, const void *data)
> +{
> +	struct cxl_port *port;
> +
> +	if (!is_cxl_port(dev))
> +		return 0;
> +
> +	port = to_cxl_port(dev);
> +
> +	if (find_dport_by_dev(port, (struct device *)data))
Why the cast?

If this isn't modified in later patches

	return find_dport_by_dev(port, data);


> +		return 1;
> +
> +	return 0;
> +}
> +
> +static bool is_cxl_mem_enabled(struct pci_dev *pdev)
> +{
> +	int pcie_dvsec;
> +	u16 dvsec_ctrl;
> +
> +	pcie_dvsec = cxl_pci_dvsec(pdev, PCI_DVSEC_ID_PCIE_DVSEC_CXL_DVSEC_ID);
> +	if (!pcie_dvsec) {
> +		dev_info(&pdev->dev, "Unable to determine CXL protocol support");
> +		return false;
> +	}
> +
> +	pci_read_config_word(pdev,
> +			     pcie_dvsec + PCI_DVSEC_ID_CXL_PCIE_CTRL_OFFSET,
> +			     &dvsec_ctrl);
> +	if (!(dvsec_ctrl & CXL_PCIE_MEM_ENABLE)) {
> +		dev_info(&pdev->dev, "CXL.mem protocol not supported on device");

In the ctrl field that indicates it's not on, rather than not supported.
Hence I think the dev_info message is wrong.

> +		return false;
> +	}
> +
> +	return true;
> +}
> +
>  static int cxl_mem_probe(struct device *dev)
>  {
> -	return -EOPNOTSUPP;
> +	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> +	struct cxl_mem *cxlm = cxlmd->cxlm;
> +	struct device *pdev_parent = cxlm->dev->parent;
> +	struct pci_dev *pdev = to_pci_dev(cxlm->dev);
> +	struct device *port_dev;
> +
> +	if (!is_cxl_mem_enabled(pdev))
> +		return -ENODEV;
> +
> +	/* TODO: if parent is a switch, this will fail. */
> +	port_dev = bus_find_device(&cxl_bus_type, NULL, pdev_parent, port_match);
> +	if (!port_dev)
> +		return -ENODEV;
> +
> +	return 0;
>  }
>  
>  static void cxl_mem_remove(struct device *dev)

...

> diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h
> index 8c1a58813816..d6b9978d05b0 100644
> --- a/drivers/cxl/pci.h
> +++ b/drivers/cxl/pci.h
> @@ -11,7 +11,10 @@
>   */
>  #define PCI_DVSEC_HEADER1_LENGTH_MASK	GENMASK(31, 20)
>  #define PCI_DVSEC_VENDOR_ID_CXL		0x1E98
> -#define PCI_DVSEC_ID_CXL		0x0
> +
> +#define PCI_DVSEC_ID_PCIE_DVSEC_CXL_DVSEC_ID	0x0

Why the rename to this?  DVSEC x3???  Can we get away with something like...

> +#define PCI_DVSEC_ID_CXL_PCIE_CTRL_OFFSET	0xC
> +#define   CXL_PCIE_MEM_ENABLE			BIT(2)
>  
>  #define PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID	0x8

Mind you I'm not clear why this one has DVSEC twice either...

>  #define PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET	0xC

This one has nothing to do with DVSEC ID... 

> @@ -29,4 +32,6 @@
>  
>  #define CXL_REGLOC_ADDR_MASK GENMASK(31, 16)
>  
> +int cxl_pci_dvsec(struct pci_dev *pdev, int dvsec);
> +
>  #endif /* __CXL_PCI_H__ */
Dan Williams Sept. 10, 2021, 9:59 p.m. UTC | #2
On Thu, Sep 2, 2021 at 12:50 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
>
> If the "upstream" port of the endpoint is an enumerated downstream CXL
> port, and the device itself is CXL capable and enabled, the memdev
> driver can bind. This binding useful for region configuration/creation
> because it provides a clean way for the region code to determine if the
> memdev is actually CXL capable.
>
> A memdev/hostbridge probe race is solved with a full CXL bus rescan at
> the end of ACPI probing (see comment in code for details). Switch
> enumeration will be done as a follow-on patch. As a result, if a switch
> is in the topology the memdev driver will not bind to any devices.
>
> CXL.mem capability is checked lazily at the time a region is bound.
> This is in line with the other configuration parameters.
>
> Below is an example (mem0, and mem1) of CXL memdev devices that now
> exist on the bus.
>
> /sys/bus/cxl/devices/
> ├── decoder0.0 -> ../../../devices/platform/ACPI0017:00/root0/decoder0.0
> ├── mem0 -> ../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem0
> ├── mem1 -> ../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem1

I'm confused, this isn't showing anything new that did not already
exist before this patch? What I think would be a useful shortcut is
for memX devices to have an attribute that links back to their cxl
root port after validation completes. Like an attribute group that
arrives and disappears when the driver successfully binds and unbinds
respectively.

> ├── pmem0 -> ../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem0/pmem0
> ├── pmem1 -> ../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem1/pmem1
> ├── port1 -> ../../../devices/platform/ACPI0017:00/root0/port1
> └── root0 -> ../../../devices/platform/ACPI0017:00/root0
>
> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> ---
>  drivers/cxl/acpi.c        | 27 +++++++-----------
>  drivers/cxl/core/bus.c    | 60 +++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/core/memdev.c |  6 ++++
>  drivers/cxl/cxl.h         |  2 ++
>  drivers/cxl/cxlmem.h      |  2 ++
>  drivers/cxl/mem.c         | 55 ++++++++++++++++++++++++++++++++++-
>  drivers/cxl/pci.c         | 23 ---------------
>  drivers/cxl/pci.h         |  7 ++++-
>  8 files changed, 141 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> index 7130beffc929..fd14094bdb3f 100644
> --- a/drivers/cxl/acpi.c
> +++ b/drivers/cxl/acpi.c
> @@ -240,21 +240,6 @@ __mock int match_add_root_ports(struct pci_dev *pdev, void *data)
>         return 0;
>  }
>
> -static struct cxl_dport *find_dport_by_dev(struct cxl_port *port, struct device *dev)
> -{
> -       struct cxl_dport *dport;
> -
> -       device_lock(&port->dev);
> -       list_for_each_entry(dport, &port->dports, list)
> -               if (dport->dport == dev) {
> -                       device_unlock(&port->dev);
> -                       return dport;
> -               }
> -
> -       device_unlock(&port->dev);
> -       return NULL;
> -}
> -
>  __mock struct acpi_device *to_cxl_host_bridge(struct device *host,
>                                               struct device *dev)
>  {
> @@ -459,9 +444,19 @@ static int cxl_acpi_probe(struct platform_device *pdev)
>         if (rc)
>                 goto out;
>
> -       if (IS_ENABLED(CONFIG_CXL_PMEM))
> +       if (IS_ENABLED(CONFIG_CXL_PMEM)) {
>                 rc = device_for_each_child(&root_port->dev, root_port,
>                                            add_root_nvdimm_bridge);
> +               if (rc)
> +                       goto out;
> +       }
> +
> +       /*
> +        * While ACPI is scanning hostbridge ports, switches and memory devices
> +        * may have been probed. Those devices will need to know whether the
> +        * hostbridge is CXL capable.
> +        */
> +       rc = bus_rescan_devices(&cxl_bus_type);

I don't think it's a good idea to call bus_rescan_devices() from
probe() context. This now sets up a lockdep dependency between the
ACPI0017 device-lock and all the device-locks for every device on the
cxl-bus. This is why the nvdimm code punts the rescan outside the lock
to a workqueue.

Lockdep unfortunately won't complain about device-lock entanglements.
One item for the backlog is to add device-lock validation to the cxl
subsystem ala commit 87a30e1f05d7 ("driver-core, libnvdimm: Let device
subsystems add local lockdep coverage")


>
>  out:
>         acpi_put_table(acpi_cedt);
> diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c
> index 256e55dc2a3b..56f57302d27b 100644
> --- a/drivers/cxl/core/bus.c
> +++ b/drivers/cxl/core/bus.c
> @@ -8,6 +8,7 @@
>  #include <linux/idr.h>
>  #include <cxlmem.h>
>  #include <cxl.h>
> +#include <pci.h>
>  #include "core.h"
>
>  /**
> @@ -259,6 +260,12 @@ static const struct device_type cxl_port_type = {
>         .groups = cxl_port_attribute_groups,
>  };
>
> +bool is_cxl_port(struct device *dev)
> +{
> +       return dev->type == &cxl_port_type;
> +}
> +EXPORT_SYMBOL_GPL(is_cxl_port);
> +
>  struct cxl_port *to_cxl_port(struct device *dev)
>  {
>         if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type,
> @@ -266,6 +273,7 @@ struct cxl_port *to_cxl_port(struct device *dev)
>                 return NULL;
>         return container_of(dev, struct cxl_port, dev);
>  }
> +EXPORT_SYMBOL_GPL(to_cxl_port);
>
>  static void unregister_port(void *_port)
>  {
> @@ -424,6 +432,27 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new)
>         return dup ? -EEXIST : 0;
>  }
>
> +/**
> + * find_dport_by_dev - gets downstream CXL port from a struct device
> + * @port: cxl [upstream] port that "owns" the downstream port is being queried
> + * @dev: The device that is backing the downstream port
> + */
> +struct cxl_dport *find_dport_by_dev(struct cxl_port *port, const struct device *dev)
> +{
> +       struct cxl_dport *dport;
> +
> +       device_lock(&port->dev);
> +       list_for_each_entry(dport, &port->dports, list)
> +               if (dport->dport == dev) {
> +                       device_unlock(&port->dev);
> +                       return dport;
> +               }
> +
> +       device_unlock(&port->dev);
> +       return NULL;
> +}
> +EXPORT_SYMBOL_GPL(find_dport_by_dev);

This wants to move into the "cxl_" prefix symbol namespace if it's now
going to be a public function.

> +
>  /**
>   * cxl_add_dport - append downstream port data to a cxl_port
>   * @port: the cxl_port that references this dport
> @@ -596,6 +625,37 @@ int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld)
>  }
>  EXPORT_SYMBOL_GPL(cxl_decoder_autoremove);
>
> +/**
> + * cxl_pci_dvsec - Gets offset for the given DVSEC id
> + * @pdev: PCI device to search for the DVSEC
> + * @dvsec: DVSEC id to look for
> + *
> + * Return: offset within the PCI header for the given DVSEC id. 0 if not found
> + */
> +int cxl_pci_dvsec(struct pci_dev *pdev, int dvsec)
> +{
> +       int pos;
> +
> +       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
> +       if (!pos)
> +               return 0;
> +
> +       while (pos) {
> +               u16 vendor, id;
> +
> +               pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
> +               pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
> +               if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
> +                       return pos;
> +
> +               pos = pci_find_next_ext_capability(pdev, pos,
> +                                                  PCI_EXT_CAP_ID_DVSEC);
> +       }
> +
> +       return 0;
> +}
> +EXPORT_SYMBOL_GPL(cxl_mem_dvsec);

Why not keep this enumeration in cxl_pci and have it record the
component register block base address at cxl_memdev creation time?
This would make it similar to cxl_port creation that takes a
component_register base address argument.

> +
>  /**
>   * __cxl_driver_register - register a driver for the cxl bus
>   * @cxl_drv: cxl driver structure to attach
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index c9dd054bd813..0068b5ff5f3e 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -337,3 +337,9 @@ void cxl_memdev_exit(void)
>  {
>         unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
>  }
> +
> +bool is_cxl_mem_capable(struct cxl_memdev *cxlmd)
> +{
> +       return !!cxlmd->dev.driver;
> +}
> +EXPORT_SYMBOL_GPL(is_cxl_mem_capable);

Perhaps:

s/capable/{enabled,routed}/

The device is always capable, it's the hierarchy that will let it down.

> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index b48bdbefd949..a168520d741b 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -283,8 +283,10 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
>                                    resource_size_t component_reg_phys,
>                                    struct cxl_port *parent_port);
>
> +bool is_cxl_port(struct device *dev);
>  int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id,
>                   resource_size_t component_reg_phys);
> +struct cxl_dport *find_dport_by_dev(struct cxl_port *port, const struct device *dev);
>
>  struct cxl_decoder *to_cxl_decoder(struct device *dev);
>  bool is_root_decoder(struct device *dev);
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 811b24451604..88264204c4b9 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -51,6 +51,8 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
>  struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
>                                        struct cxl_mem *cxlm);
>
> +bool is_cxl_mem_capable(struct cxl_memdev *cxlmd);
> +
>  /**
>   * struct cxl_mbox_cmd - A command to be submitted to hardware.
>   * @opcode: (input) The command set and command submitted to hardware.
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index 978a54b0a51a..b6dc34d18a86 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -2,8 +2,10 @@
>  /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
>  #include <linux/device.h>
>  #include <linux/module.h>
> +#include <linux/pci.h>
>
>  #include "cxlmem.h"
> +#include "pci.h"
>
>  /**
>   * DOC: cxl mem
> @@ -17,9 +19,60 @@
>   * components.
>   */
>
> +static int port_match(struct device *dev, const void *data)
> +{
> +       struct cxl_port *port;
> +
> +       if (!is_cxl_port(dev))
> +               return 0;
> +
> +       port = to_cxl_port(dev);
> +
> +       if (find_dport_by_dev(port, (struct device *)data))
> +               return 1;
> +
> +       return 0;
> +}
> +
> +static bool is_cxl_mem_enabled(struct pci_dev *pdev)
> +{
> +       int pcie_dvsec;
> +       u16 dvsec_ctrl;
> +
> +       pcie_dvsec = cxl_pci_dvsec(pdev, PCI_DVSEC_ID_PCIE_DVSEC_CXL_DVSEC_ID);
> +       if (!pcie_dvsec) {
> +               dev_info(&pdev->dev, "Unable to determine CXL protocol support");
> +               return false;
> +       }
> +
> +       pci_read_config_word(pdev,
> +                            pcie_dvsec + PCI_DVSEC_ID_CXL_PCIE_CTRL_OFFSET,
> +                            &dvsec_ctrl);
> +       if (!(dvsec_ctrl & CXL_PCIE_MEM_ENABLE)) {
> +               dev_info(&pdev->dev, "CXL.mem protocol not supported on device");
> +               return false;
> +       }
> +
> +       return true;
> +}
> +
>  static int cxl_mem_probe(struct device *dev)
>  {
> -       return -EOPNOTSUPP;
> +       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> +       struct cxl_mem *cxlm = cxlmd->cxlm;
> +       struct device *pdev_parent = cxlm->dev->parent;
> +       struct pci_dev *pdev = to_pci_dev(cxlm->dev);

It's not safe to assume that the parent of a cxlmd is a pci device.

> +       struct device *port_dev;
> +
> +       if (!is_cxl_mem_enabled(pdev))
> +               return -ENODEV;

This isn't sufficient, this needs to walk the entire hierarchy, right?

> +
> +       /* TODO: if parent is a switch, this will fail. */

Won't the parent be a switch in all cases? For example, even in QEMU
today the parent of the CXL device is the switch in the host bridge.

# cat /sys/bus/cxl/devices/port1/decoder1.0/devtype
cxl_decoder_switch

> +       port_dev = bus_find_device(&cxl_bus_type, NULL, pdev_parent, port_match);
> +       if (!port_dev)
> +               return -ENODEV;
> +
> +       return 0;
>  }
>
>  static void cxl_mem_remove(struct device *dev)
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 6931885c83ce..244b99948c40 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -335,29 +335,6 @@ static void cxl_pci_unmap_regblock(struct cxl_mem *cxlm, void __iomem *base)
>         pci_iounmap(to_pci_dev(cxlm->dev), base);
>  }
>
> -static int cxl_pci_dvsec(struct pci_dev *pdev, int dvsec)
> -{
> -       int pos;
> -
> -       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
> -       if (!pos)
> -               return 0;
> -
> -       while (pos) {
> -               u16 vendor, id;
> -
> -               pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
> -               pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
> -               if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
> -                       return pos;
> -
> -               pos = pci_find_next_ext_capability(pdev, pos,
> -                                                  PCI_EXT_CAP_ID_DVSEC);
> -       }
> -
> -       return 0;
> -}
> -
>  static int cxl_probe_regs(struct cxl_mem *cxlm, void __iomem *base,
>                           struct cxl_register_map *map)
>  {
> diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h
> index 8c1a58813816..d6b9978d05b0 100644
> --- a/drivers/cxl/pci.h
> +++ b/drivers/cxl/pci.h
> @@ -11,7 +11,10 @@
>   */
>  #define PCI_DVSEC_HEADER1_LENGTH_MASK  GENMASK(31, 20)
>  #define PCI_DVSEC_VENDOR_ID_CXL                0x1E98
> -#define PCI_DVSEC_ID_CXL               0x0
> +
> +#define PCI_DVSEC_ID_PCIE_DVSEC_CXL_DVSEC_ID   0x0
> +#define PCI_DVSEC_ID_CXL_PCIE_CTRL_OFFSET      0xC
> +#define   CXL_PCIE_MEM_ENABLE                  BIT(2)
>
>  #define PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID       0x8
>  #define PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET  0xC
> @@ -29,4 +32,6 @@
>
>  #define CXL_REGLOC_ADDR_MASK GENMASK(31, 16)
>
> +int cxl_pci_dvsec(struct pci_dev *pdev, int dvsec);
> +
>  #endif /* __CXL_PCI_H__ */
> --
> 2.33.0
>
Ben Widawsky Sept. 13, 2021, 7:01 p.m. UTC | #3
On 21-09-03 16:21:57, Jonathan Cameron wrote:
> On Thu, 2 Sep 2021 12:50:11 -0700
> Ben Widawsky <ben.widawsky@intel.com> wrote:
> 
> > If the "upstream" port of the endpoint is an enumerated downstream CXL
> > port, and the device itself is CXL capable and enabled, the memdev
> > driver can bind. This binding useful for region configuration/creation
> > because it provides a clean way for the region code to determine if the
> > memdev is actually CXL capable.
> > 
> > A memdev/hostbridge probe race is solved with a full CXL bus rescan at
> > the end of ACPI probing (see comment in code for details). Switch
> > enumeration will be done as a follow-on patch. As a result, if a switch
> > is in the topology the memdev driver will not bind to any devices.
> > 
> > CXL.mem capability is checked lazily at the time a region is bound.
> > This is in line with the other configuration parameters.
> > 
> > Below is an example (mem0, and mem1) of CXL memdev devices that now
> > exist on the bus.
> > 
> > /sys/bus/cxl/devices/
> > ├── decoder0.0 -> ../../../devices/platform/ACPI0017:00/root0/decoder0.0
> > ├── mem0 -> ../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem0
> > ├── mem1 -> ../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem1
> > ├── pmem0 -> ../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem0/pmem0
> > ├── pmem1 -> ../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem1/pmem1
> > ├── port1 -> ../../../devices/platform/ACPI0017:00/root0/port1
> > └── root0 -> ../../../devices/platform/ACPI0017:00/root0
> > 
> > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> 
> +CC Bjorn.  Given we are moving the (nearly) generic DVSEC lookup routine, perhaps now
> is time to move it into PCI core code?
> 

Fine with me. Adding linux-pci as well...

> > ---
> >  drivers/cxl/acpi.c        | 27 +++++++-----------
> >  drivers/cxl/core/bus.c    | 60 +++++++++++++++++++++++++++++++++++++++
> >  drivers/cxl/core/memdev.c |  6 ++++
> >  drivers/cxl/cxl.h         |  2 ++
> >  drivers/cxl/cxlmem.h      |  2 ++
> >  drivers/cxl/mem.c         | 55 ++++++++++++++++++++++++++++++++++-
> >  drivers/cxl/pci.c         | 23 ---------------
> >  drivers/cxl/pci.h         |  7 ++++-
> >  8 files changed, 141 insertions(+), 41 deletions(-)
> > 
> 
> ...
> 
> > diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c
> > index 256e55dc2a3b..56f57302d27b 100644
> > --- a/drivers/cxl/core/bus.c
> > +++ b/drivers/cxl/core/bus.c
> 
> ...
> 
> > @@ -596,6 +625,37 @@ int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld)
> >  }
> >  EXPORT_SYMBOL_GPL(cxl_decoder_autoremove);
> >  
> > +/**
> > + * cxl_pci_dvsec - Gets offset for the given DVSEC id
> > + * @pdev: PCI device to search for the DVSEC
> > + * @dvsec: DVSEC id to look for
> > + *
> > + * Return: offset within the PCI header for the given DVSEC id. 0 if not found
> > + */
> > +int cxl_pci_dvsec(struct pci_dev *pdev, int dvsec)
> > +{
> > +	int pos;
> > +
> > +	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
> > +	if (!pos)
> > +		return 0;
> > +
> > +	while (pos) {
> > +		u16 vendor, id;
> > +
> > +		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
> > +		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
> > +		if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
> > +			return pos;
> > +
> > +		pos = pci_find_next_ext_capability(pdev, pos,
> > +						   PCI_EXT_CAP_ID_DVSEC);
> > +	}
> > +
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(cxl_mem_dvsec);
> 
> That's not going to work. (pci/mem)  + Can we move this to the PCI core now?
> Wasn't done originally because there were several copies in various different
> trees that needed to all come together. Oddly only this one seems to have
> made it in though.

I'm confused about why it won't work - nevertheless, PCI core is probably the
right place.

Something like?

/**
 * pci_find_dvsec_capability - Find DVSEC for vendor
 * @dev: PCI device to query
 * @vendor: Vendor ID to match for the DVSEC
 * @dvsec: Designated Vendor-specific capability ID
 *
 * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the capability
 * offset in config space; otherwise return 0.
 */
u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec)
{
	int pos;

	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DVSEC);
	if (!pos)
		return 0;

	while (pos) {
		u16 vendor, id;

		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
		if (vendor == vendor && dvsec == id)
			return pos;

		pos = pci_find_next_ext_capability(dev, pos, PCI_EXT_CAP_ID_DVSEC);
	}

	return 0;
}
EXPORT_SYMBOL_GPL(pci_find_dvsec_capability);

> 
> 
> > +
> >  /**
> >   * __cxl_driver_register - register a driver for the cxl bus
> >   * @cxl_drv: cxl driver structure to attach
> > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> > index c9dd054bd813..0068b5ff5f3e 100644
> > --- a/drivers/cxl/core/memdev.c
> > +++ b/drivers/cxl/core/memdev.c
> > @@ -337,3 +337,9 @@ void cxl_memdev_exit(void)
> >  {
> >  	unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
> >  }
> > +
> > +bool is_cxl_mem_capable(struct cxl_memdev *cxlmd)
> > +{
> 
> This feels like it needs a comment to say why that's a valid check to use
> to find out if it is mem capable.
> 
> > +	return !!cxlmd->dev.driver;
> > +}
> > +EXPORT_SYMBOL_GPL(is_cxl_mem_capable);
> > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> > index b48bdbefd949..a168520d741b 100644
> > --- a/drivers/cxl/cxl.h
> > +++ b/drivers/cxl/cxl.h
> > @@ -283,8 +283,10 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
> >  				   resource_size_t component_reg_phys,
> >  				   struct cxl_port *parent_port);
> >  
> > +bool is_cxl_port(struct device *dev);
> >  int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id,
> >  		  resource_size_t component_reg_phys);
> > +struct cxl_dport *find_dport_by_dev(struct cxl_port *port, const struct device *dev);
> >  
> >  struct cxl_decoder *to_cxl_decoder(struct device *dev);
> >  bool is_root_decoder(struct device *dev);
> > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> > index 811b24451604..88264204c4b9 100644
> > --- a/drivers/cxl/cxlmem.h
> > +++ b/drivers/cxl/cxlmem.h
> > @@ -51,6 +51,8 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
> >  struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
> >  				       struct cxl_mem *cxlm);
> >  
> > +bool is_cxl_mem_capable(struct cxl_memdev *cxlmd);
> > +
> >  /**
> >   * struct cxl_mbox_cmd - A command to be submitted to hardware.
> >   * @opcode: (input) The command set and command submitted to hardware.
> > diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> > index 978a54b0a51a..b6dc34d18a86 100644
> > --- a/drivers/cxl/mem.c
> > +++ b/drivers/cxl/mem.c
> > @@ -2,8 +2,10 @@
> >  /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
> >  #include <linux/device.h>
> >  #include <linux/module.h>
> > +#include <linux/pci.h>
> >  
> >  #include "cxlmem.h"
> > +#include "pci.h"
> >  
> >  /**
> >   * DOC: cxl mem
> > @@ -17,9 +19,60 @@
> >   * components.
> >   */
> >  
> > +static int port_match(struct device *dev, const void *data)
> > +{
> > +	struct cxl_port *port;
> > +
> > +	if (!is_cxl_port(dev))
> > +		return 0;
> > +
> > +	port = to_cxl_port(dev);
> > +
> > +	if (find_dport_by_dev(port, (struct device *)data))
> Why the cast?
> 
> If this isn't modified in later patches
> 
> 	return find_dport_by_dev(port, data);
> 
> 
> > +		return 1;
> > +
> > +	return 0;
> > +}
> > +
> > +static bool is_cxl_mem_enabled(struct pci_dev *pdev)
> > +{
> > +	int pcie_dvsec;
> > +	u16 dvsec_ctrl;
> > +
> > +	pcie_dvsec = cxl_pci_dvsec(pdev, PCI_DVSEC_ID_PCIE_DVSEC_CXL_DVSEC_ID);
> > +	if (!pcie_dvsec) {
> > +		dev_info(&pdev->dev, "Unable to determine CXL protocol support");
> > +		return false;
> > +	}
> > +
> > +	pci_read_config_word(pdev,
> > +			     pcie_dvsec + PCI_DVSEC_ID_CXL_PCIE_CTRL_OFFSET,
> > +			     &dvsec_ctrl);
> > +	if (!(dvsec_ctrl & CXL_PCIE_MEM_ENABLE)) {
> > +		dev_info(&pdev->dev, "CXL.mem protocol not supported on device");
> 
> In the ctrl field that indicates it's not on, rather than not supported.
> Hence I think the dev_info message is wrong.
> 
> > +		return false;
> > +	}
> > +
> > +	return true;
> > +}
> > +
> >  static int cxl_mem_probe(struct device *dev)
> >  {
> > -	return -EOPNOTSUPP;
> > +	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> > +	struct cxl_mem *cxlm = cxlmd->cxlm;
> > +	struct device *pdev_parent = cxlm->dev->parent;
> > +	struct pci_dev *pdev = to_pci_dev(cxlm->dev);
> > +	struct device *port_dev;
> > +
> > +	if (!is_cxl_mem_enabled(pdev))
> > +		return -ENODEV;
> > +
> > +	/* TODO: if parent is a switch, this will fail. */
> > +	port_dev = bus_find_device(&cxl_bus_type, NULL, pdev_parent, port_match);
> > +	if (!port_dev)
> > +		return -ENODEV;
> > +
> > +	return 0;
> >  }
> >  
> >  static void cxl_mem_remove(struct device *dev)
> 
> ...
> 

I took all the rest of the feedback up to here...

> > diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h
> > index 8c1a58813816..d6b9978d05b0 100644
> > --- a/drivers/cxl/pci.h
> > +++ b/drivers/cxl/pci.h
> > @@ -11,7 +11,10 @@
> >   */
> >  #define PCI_DVSEC_HEADER1_LENGTH_MASK	GENMASK(31, 20)
> >  #define PCI_DVSEC_VENDOR_ID_CXL		0x1E98
> > -#define PCI_DVSEC_ID_CXL		0x0
> > +
> > +#define PCI_DVSEC_ID_PCIE_DVSEC_CXL_DVSEC_ID	0x0
> 
> Why the rename to this?  DVSEC x3???  Can we get away with something like...
> 
> > +#define PCI_DVSEC_ID_CXL_PCIE_CTRL_OFFSET	0xC
> > +#define   CXL_PCIE_MEM_ENABLE			BIT(2)
> >  
> >  #define PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID	0x8
> 
> Mind you I'm not clear why this one has DVSEC twice either...
> 
> >  #define PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET	0xC
> 
> This one has nothing to do with DVSEC ID... 
> 

I've already forgotten the motivation for the existing names and I don't care to
go back in the history. I was trying to create a schema here.

As you point out, both the description name and the field name introduce
redundant words. So how about all redundant of CXL and DVSEC are removed and the
schema becomes:

CXL DVSEC IDs : CXL_DVSEC_<description>
Offsets: DVSEC_<description>_<field>_OFFSET
Bits/mask DVSEC_<description>_<field>_<attribute>

#define CXL_DVSEC_PCIE_DEVICE		0
#define CXL_DVSEC_FUNCTION_MAP		2
#define CXL_DVSEC_PORT_EXTENSIONS	3
#define CXL_DVSEC_PORT_GPF		4
#define CXL_DVSEC_DEVICE_GPF		5
#define CXL_DVSEC_PCIE_FLEXBUS_PORT	7
#define CXL_DVSEC_REGISTER_LOCATOR	8
#define CXL_DVSEC_MLD			9
#define CXL_DVSEC_PCIE_TEST_CAPABILITY	10

Then an offset within one of the DVSECs...
#define CXL_DVSEC_REGISTER_LOCATOR		8
#define   DVSEC_REGISTER_LOCATOR_BLOCK1_OFFSET	0xC

Then a bit/mask...
#define CXL_DVSEC_PCIE_DEVICE			0
#define  DVSEC_PCIE_DEVICE_CAP_OFFSET		0xA
#define    DVSEC_PCIE_DEVICE_CAP_MEM_CAPABLE	BIT(2)

I'm open to other suggestions. I feel like translating registers from spec to
header files is one thing I've done over and over on many projects and I think
I'm unhappy with every solution we've managed to implement.

> > @@ -29,4 +32,6 @@
> >  
> >  #define CXL_REGLOC_ADDR_MASK GENMASK(31, 16)
> >  
> > +int cxl_pci_dvsec(struct pci_dev *pdev, int dvsec);
> > +
> >  #endif /* __CXL_PCI_H__ */
>
Ben Widawsky Sept. 13, 2021, 10:10 p.m. UTC | #4
On 21-09-10 14:59:29, Dan Williams wrote:
> On Thu, Sep 2, 2021 at 12:50 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
> >
> > If the "upstream" port of the endpoint is an enumerated downstream CXL
> > port, and the device itself is CXL capable and enabled, the memdev
> > driver can bind. This binding useful for region configuration/creation
> > because it provides a clean way for the region code to determine if the
> > memdev is actually CXL capable.
> >
> > A memdev/hostbridge probe race is solved with a full CXL bus rescan at
> > the end of ACPI probing (see comment in code for details). Switch
> > enumeration will be done as a follow-on patch. As a result, if a switch
> > is in the topology the memdev driver will not bind to any devices.
> >
> > CXL.mem capability is checked lazily at the time a region is bound.
> > This is in line with the other configuration parameters.
> >
> > Below is an example (mem0, and mem1) of CXL memdev devices that now
> > exist on the bus.
> >
> > /sys/bus/cxl/devices/
> > ├── decoder0.0 -> ../../../devices/platform/ACPI0017:00/root0/decoder0.0
> > ├── mem0 -> ../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem0
> > ├── mem1 -> ../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem1
> 
> I'm confused, this isn't showing anything new that did not already
> exist before this patch? What I think would be a useful shortcut is
> for memX devices to have an attribute that links back to their cxl
> root port after validation completes. Like an attribute group that
> arrives and disappears when the driver successfully binds and unbinds
> respectively.
> 

This was a copy-paste mistake. I meant to show the memX devices under cxl_mem
drivers, like this:
# tree /sys/bus/cxl/drivers/
/sys/bus/cxl/drivers/
├── cxl_mem
│   ├── bind
│   ├── mem0 -> ../../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem0
│   ├── mem1 -> ../../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem1
│   ├── uevent
│   └── unbind
├── cxl_nvdimm
│   ├── bind
│   ├── uevent
│   └── unbind
└── cxl_nvdimm_bridge
    ├── bind
    ├── uevent
    └── unbind


While I'm not opposed to add a link as you mention, I don't yet see the utility.
Are you thinking as primarily a convenience for userspace tooling? Is this
useful if you have a switch in the path?

> > ├── pmem0 -> ../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem0/pmem0
> > ├── pmem1 -> ../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem1/pmem1
> > ├── port1 -> ../../../devices/platform/ACPI0017:00/root0/port1
> > └── root0 -> ../../../devices/platform/ACPI0017:00/root0
> >
> > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> > ---
> >  drivers/cxl/acpi.c        | 27 +++++++-----------
> >  drivers/cxl/core/bus.c    | 60 +++++++++++++++++++++++++++++++++++++++
> >  drivers/cxl/core/memdev.c |  6 ++++
> >  drivers/cxl/cxl.h         |  2 ++
> >  drivers/cxl/cxlmem.h      |  2 ++
> >  drivers/cxl/mem.c         | 55 ++++++++++++++++++++++++++++++++++-
> >  drivers/cxl/pci.c         | 23 ---------------
> >  drivers/cxl/pci.h         |  7 ++++-
> >  8 files changed, 141 insertions(+), 41 deletions(-)
> >
> > diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> > index 7130beffc929..fd14094bdb3f 100644
> > --- a/drivers/cxl/acpi.c
> > +++ b/drivers/cxl/acpi.c
> > @@ -240,21 +240,6 @@ __mock int match_add_root_ports(struct pci_dev *pdev, void *data)
> >         return 0;
> >  }
> >
> > -static struct cxl_dport *find_dport_by_dev(struct cxl_port *port, struct device *dev)
> > -{
> > -       struct cxl_dport *dport;
> > -
> > -       device_lock(&port->dev);
> > -       list_for_each_entry(dport, &port->dports, list)
> > -               if (dport->dport == dev) {
> > -                       device_unlock(&port->dev);
> > -                       return dport;
> > -               }
> > -
> > -       device_unlock(&port->dev);
> > -       return NULL;
> > -}
> > -
> >  __mock struct acpi_device *to_cxl_host_bridge(struct device *host,
> >                                               struct device *dev)
> >  {
> > @@ -459,9 +444,19 @@ static int cxl_acpi_probe(struct platform_device *pdev)
> >         if (rc)
> >                 goto out;
> >
> > -       if (IS_ENABLED(CONFIG_CXL_PMEM))
> > +       if (IS_ENABLED(CONFIG_CXL_PMEM)) {
> >                 rc = device_for_each_child(&root_port->dev, root_port,
> >                                            add_root_nvdimm_bridge);
> > +               if (rc)
> > +                       goto out;
> > +       }
> > +
> > +       /*
> > +        * While ACPI is scanning hostbridge ports, switches and memory devices
> > +        * may have been probed. Those devices will need to know whether the
> > +        * hostbridge is CXL capable.
> > +        */
> > +       rc = bus_rescan_devices(&cxl_bus_type);
> 
> I don't think it's a good idea to call bus_rescan_devices() from
> probe() context. This now sets up a lockdep dependency between the
> ACPI0017 device-lock and all the device-locks for every device on the
> cxl-bus. This is why the nvdimm code punts the rescan outside the lock
> to a workqueue.
> 
> Lockdep unfortunately won't complain about device-lock entanglements.
> One item for the backlog is to add device-lock validation to the cxl
> subsystem ala commit 87a30e1f05d7 ("driver-core, libnvdimm: Let device
> subsystems add local lockdep coverage")

Good catch. I will fix. This is now the second time I tripped over that backlog
item :-)

> 
> 
> >
> >  out:
> >         acpi_put_table(acpi_cedt);
> > diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c
> > index 256e55dc2a3b..56f57302d27b 100644
> > --- a/drivers/cxl/core/bus.c
> > +++ b/drivers/cxl/core/bus.c
> > @@ -8,6 +8,7 @@
> >  #include <linux/idr.h>
> >  #include <cxlmem.h>
> >  #include <cxl.h>
> > +#include <pci.h>
> >  #include "core.h"
> >
> >  /**
> > @@ -259,6 +260,12 @@ static const struct device_type cxl_port_type = {
> >         .groups = cxl_port_attribute_groups,
> >  };
> >
> > +bool is_cxl_port(struct device *dev)
> > +{
> > +       return dev->type == &cxl_port_type;
> > +}
> > +EXPORT_SYMBOL_GPL(is_cxl_port);
> > +
> >  struct cxl_port *to_cxl_port(struct device *dev)
> >  {
> >         if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type,
> > @@ -266,6 +273,7 @@ struct cxl_port *to_cxl_port(struct device *dev)
> >                 return NULL;
> >         return container_of(dev, struct cxl_port, dev);
> >  }
> > +EXPORT_SYMBOL_GPL(to_cxl_port);
> >
> >  static void unregister_port(void *_port)
> >  {
> > @@ -424,6 +432,27 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new)
> >         return dup ? -EEXIST : 0;
> >  }
> >
> > +/**
> > + * find_dport_by_dev - gets downstream CXL port from a struct device
> > + * @port: cxl [upstream] port that "owns" the downstream port is being queried
> > + * @dev: The device that is backing the downstream port
> > + */
> > +struct cxl_dport *find_dport_by_dev(struct cxl_port *port, const struct device *dev)
> > +{
> > +       struct cxl_dport *dport;
> > +
> > +       device_lock(&port->dev);
> > +       list_for_each_entry(dport, &port->dports, list)
> > +               if (dport->dport == dev) {
> > +                       device_unlock(&port->dev);
> > +                       return dport;
> > +               }
> > +
> > +       device_unlock(&port->dev);
> > +       return NULL;
> > +}
> > +EXPORT_SYMBOL_GPL(find_dport_by_dev);
> 
> This wants to move into the "cxl_" prefix symbol namespace if it's now
> going to be a public function.
> 
> > +
> >  /**
> >   * cxl_add_dport - append downstream port data to a cxl_port
> >   * @port: the cxl_port that references this dport
> > @@ -596,6 +625,37 @@ int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld)
> >  }
> >  EXPORT_SYMBOL_GPL(cxl_decoder_autoremove);
> >
> > +/**
> > + * cxl_pci_dvsec - Gets offset for the given DVSEC id
> > + * @pdev: PCI device to search for the DVSEC
> > + * @dvsec: DVSEC id to look for
> > + *
> > + * Return: offset within the PCI header for the given DVSEC id. 0 if not found
> > + */
> > +int cxl_pci_dvsec(struct pci_dev *pdev, int dvsec)
> > +{
> > +       int pos;
> > +
> > +       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
> > +       if (!pos)
> > +               return 0;
> > +
> > +       while (pos) {
> > +               u16 vendor, id;
> > +
> > +               pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
> > +               pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
> > +               if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
> > +                       return pos;
> > +
> > +               pos = pci_find_next_ext_capability(pdev, pos,
> > +                                                  PCI_EXT_CAP_ID_DVSEC);
> > +       }
> > +
> > +       return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(cxl_mem_dvsec);
> 
> Why not keep this enumeration in cxl_pci and have it record the
> component register block base address at cxl_memdev creation time?
> This would make it similar to cxl_port creation that takes a
> component_register base address argument.
> 

It does that, this is needed in addition to that to find certain DVSEC registers
that are used to determine CXL properties.

> > +
> >  /**
> >   * __cxl_driver_register - register a driver for the cxl bus
> >   * @cxl_drv: cxl driver structure to attach
> > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> > index c9dd054bd813..0068b5ff5f3e 100644
> > --- a/drivers/cxl/core/memdev.c
> > +++ b/drivers/cxl/core/memdev.c
> > @@ -337,3 +337,9 @@ void cxl_memdev_exit(void)
> >  {
> >         unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
> >  }
> > +
> > +bool is_cxl_mem_capable(struct cxl_memdev *cxlmd)
> > +{
> > +       return !!cxlmd->dev.driver;
> > +}
> > +EXPORT_SYMBOL_GPL(is_cxl_mem_capable);
> 
> Perhaps:
> 
> s/capable/{enabled,routed}/
> 
> The device is always capable, it's the hierarchy that will let it down.

I can rename to routed. From my perspective, capable and enabled aren't
synonymous. A capable device may not be enabled.

> 
> > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> > index b48bdbefd949..a168520d741b 100644
> > --- a/drivers/cxl/cxl.h
> > +++ b/drivers/cxl/cxl.h
> > @@ -283,8 +283,10 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
> >                                    resource_size_t component_reg_phys,
> >                                    struct cxl_port *parent_port);
> >
> > +bool is_cxl_port(struct device *dev);
> >  int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id,
> >                   resource_size_t component_reg_phys);
> > +struct cxl_dport *find_dport_by_dev(struct cxl_port *port, const struct device *dev);
> >
> >  struct cxl_decoder *to_cxl_decoder(struct device *dev);
> >  bool is_root_decoder(struct device *dev);
> > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> > index 811b24451604..88264204c4b9 100644
> > --- a/drivers/cxl/cxlmem.h
> > +++ b/drivers/cxl/cxlmem.h
> > @@ -51,6 +51,8 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
> >  struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
> >                                        struct cxl_mem *cxlm);
> >
> > +bool is_cxl_mem_capable(struct cxl_memdev *cxlmd);
> > +
> >  /**
> >   * struct cxl_mbox_cmd - A command to be submitted to hardware.
> >   * @opcode: (input) The command set and command submitted to hardware.
> > diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> > index 978a54b0a51a..b6dc34d18a86 100644
> > --- a/drivers/cxl/mem.c
> > +++ b/drivers/cxl/mem.c
> > @@ -2,8 +2,10 @@
> >  /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
> >  #include <linux/device.h>
> >  #include <linux/module.h>
> > +#include <linux/pci.h>
> >
> >  #include "cxlmem.h"
> > +#include "pci.h"
> >
> >  /**
> >   * DOC: cxl mem
> > @@ -17,9 +19,60 @@
> >   * components.
> >   */
> >
> > +static int port_match(struct device *dev, const void *data)
> > +{
> > +       struct cxl_port *port;
> > +
> > +       if (!is_cxl_port(dev))
> > +               return 0;
> > +
> > +       port = to_cxl_port(dev);
> > +
> > +       if (find_dport_by_dev(port, (struct device *)data))
> > +               return 1;
> > +
> > +       return 0;
> > +}
> > +
> > +static bool is_cxl_mem_enabled(struct pci_dev *pdev)
> > +{
> > +       int pcie_dvsec;
> > +       u16 dvsec_ctrl;
> > +
> > +       pcie_dvsec = cxl_pci_dvsec(pdev, PCI_DVSEC_ID_PCIE_DVSEC_CXL_DVSEC_ID);
> > +       if (!pcie_dvsec) {
> > +               dev_info(&pdev->dev, "Unable to determine CXL protocol support");
> > +               return false;
> > +       }
> > +
> > +       pci_read_config_word(pdev,
> > +                            pcie_dvsec + PCI_DVSEC_ID_CXL_PCIE_CTRL_OFFSET,
> > +                            &dvsec_ctrl);
> > +       if (!(dvsec_ctrl & CXL_PCIE_MEM_ENABLE)) {
> > +               dev_info(&pdev->dev, "CXL.mem protocol not supported on device");
> > +               return false;
> > +       }
> > +
> > +       return true;
> > +}
> > +
> >  static int cxl_mem_probe(struct device *dev)
> >  {
> > -       return -EOPNOTSUPP;
> > +       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> > +       struct cxl_mem *cxlm = cxlmd->cxlm;
> > +       struct device *pdev_parent = cxlm->dev->parent;
> > +       struct pci_dev *pdev = to_pci_dev(cxlm->dev);
> 
> It's not safe to assume that the parent of a cxlmd is a pci device.
> 

What can it be then? Isn't the parent always going to be a downstream port,
root port, or emulated port from cxl_test?

> > +       struct device *port_dev;
> > +
> > +       if (!is_cxl_mem_enabled(pdev))
> > +               return -ENODEV;
> 
> This isn't sufficient, this needs to walk the entire hierarchy, right?
> 

It was saved to a later patch because I have no way to actually test deeper
hierarchies at the moment. After I had already sent this, you and I discussed
not doing that. I can merge it together if there's no perceived value in keeping
them separate, which it sounds like there isn't.

> > +
> > +       /* TODO: if parent is a switch, this will fail. */
> 
> Won't the parent be a switch in all cases? For example, even in QEMU
> today the parent of the CXL device is the switch in the host bridge.
> 
> # cat /sys/bus/cxl/devices/port1/decoder1.0/devtype
> cxl_decoder_switch

In QEMU (and I assume hardware) they aren't the same, root ports have a separate
implementation from switches. That aside, the primary difference in the driver
is that cxl_acpi enumerates root ports so the cxl_mem driver can determine
connectedness as long as cxl_acpi has run.

> 
> > +       port_dev = bus_find_device(&cxl_bus_type, NULL, pdev_parent, port_match);
> > +       if (!port_dev)
> > +               return -ENODEV;
> > +
> > +       return 0;
> >  }
> >
> >  static void cxl_mem_remove(struct device *dev)
> > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> > index 6931885c83ce..244b99948c40 100644
> > --- a/drivers/cxl/pci.c
> > +++ b/drivers/cxl/pci.c
> > @@ -335,29 +335,6 @@ static void cxl_pci_unmap_regblock(struct cxl_mem *cxlm, void __iomem *base)
> >         pci_iounmap(to_pci_dev(cxlm->dev), base);
> >  }
> >
> > -static int cxl_pci_dvsec(struct pci_dev *pdev, int dvsec)
> > -{
> > -       int pos;
> > -
> > -       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
> > -       if (!pos)
> > -               return 0;
> > -
> > -       while (pos) {
> > -               u16 vendor, id;
> > -
> > -               pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
> > -               pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
> > -               if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
> > -                       return pos;
> > -
> > -               pos = pci_find_next_ext_capability(pdev, pos,
> > -                                                  PCI_EXT_CAP_ID_DVSEC);
> > -       }
> > -
> > -       return 0;
> > -}
> > -
> >  static int cxl_probe_regs(struct cxl_mem *cxlm, void __iomem *base,
> >                           struct cxl_register_map *map)
> >  {
> > diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h
> > index 8c1a58813816..d6b9978d05b0 100644
> > --- a/drivers/cxl/pci.h
> > +++ b/drivers/cxl/pci.h
> > @@ -11,7 +11,10 @@
> >   */
> >  #define PCI_DVSEC_HEADER1_LENGTH_MASK  GENMASK(31, 20)
> >  #define PCI_DVSEC_VENDOR_ID_CXL                0x1E98
> > -#define PCI_DVSEC_ID_CXL               0x0
> > +
> > +#define PCI_DVSEC_ID_PCIE_DVSEC_CXL_DVSEC_ID   0x0
> > +#define PCI_DVSEC_ID_CXL_PCIE_CTRL_OFFSET      0xC
> > +#define   CXL_PCIE_MEM_ENABLE                  BIT(2)
> >
> >  #define PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID       0x8
> >  #define PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET  0xC
> > @@ -29,4 +32,6 @@
> >
> >  #define CXL_REGLOC_ADDR_MASK GENMASK(31, 16)
> >
> > +int cxl_pci_dvsec(struct pci_dev *pdev, int dvsec);
> > +
> >  #endif /* __CXL_PCI_H__ */
> > --
> > 2.33.0
> >
Dan Williams Sept. 14, 2021, 10:42 p.m. UTC | #5
On Mon, Sep 13, 2021 at 3:10 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
>
> On 21-09-10 14:59:29, Dan Williams wrote:
> > On Thu, Sep 2, 2021 at 12:50 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
> > >
> > > If the "upstream" port of the endpoint is an enumerated downstream CXL
> > > port, and the device itself is CXL capable and enabled, the memdev
> > > driver can bind. This binding useful for region configuration/creation
> > > because it provides a clean way for the region code to determine if the
> > > memdev is actually CXL capable.
> > >
> > > A memdev/hostbridge probe race is solved with a full CXL bus rescan at
> > > the end of ACPI probing (see comment in code for details). Switch
> > > enumeration will be done as a follow-on patch. As a result, if a switch
> > > is in the topology the memdev driver will not bind to any devices.
> > >
> > > CXL.mem capability is checked lazily at the time a region is bound.
> > > This is in line with the other configuration parameters.
> > >
> > > Below is an example (mem0, and mem1) of CXL memdev devices that now
> > > exist on the bus.
> > >
> > > /sys/bus/cxl/devices/
> > > ├── decoder0.0 -> ../../../devices/platform/ACPI0017:00/root0/decoder0.0
> > > ├── mem0 -> ../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem0
> > > ├── mem1 -> ../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem1
> >
> > I'm confused, this isn't showing anything new that did not already
> > exist before this patch? What I think would be a useful shortcut is
> > for memX devices to have an attribute that links back to their cxl
> > root port after validation completes. Like an attribute group that
> > arrives and disappears when the driver successfully binds and unbinds
> > respectively.
> >
>
> This was a copy-paste mistake. I meant to show the memX devices under cxl_mem
> drivers, like this:
> # tree /sys/bus/cxl/drivers/
> /sys/bus/cxl/drivers/
> ├── cxl_mem
> │   ├── bind
> │   ├── mem0 -> ../../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem0
> │   ├── mem1 -> ../../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem1

Ok, but that's still just typical sysfs, the new ground to highlight
in my mind would be the path of the port added by the memX driver
appearing in the hierarchy of CXL ports up to the host bridge from the
device.

> │   ├── uevent
> │   └── unbind
> ├── cxl_nvdimm
> │   ├── bind
> │   ├── uevent
> │   └── unbind
> └── cxl_nvdimm_bridge
>     ├── bind
>     ├── uevent
>     └── unbind
>
>
> While I'm not opposed to add a link as you mention, I don't yet see the utility.
> Are you thinking as primarily a convenience for userspace tooling? Is this
> useful if you have a switch in the path?

I was thinking it is useful for something like:

cxl list --memdevs --root-decoder=decoder0.0

...where it filters all the memdev by the ones that can possibly
participate in a given CFMWS range. Without a cache-copy of the root
port hanging off of the memdev, cxl-cli will need to redo the work the
kernel has already done to walk the CXL connectivity topology

This capability to list devices by their relationship to other objects
proved powerful for "ndctl list".

>
> > > ├── pmem0 -> ../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem0/pmem0
> > > ├── pmem1 -> ../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem1/pmem1
> > > ├── port1 -> ../../../devices/platform/ACPI0017:00/root0/port1
> > > └── root0 -> ../../../devices/platform/ACPI0017:00/root0
> > >
> > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> > > ---
> > >  drivers/cxl/acpi.c        | 27 +++++++-----------
> > >  drivers/cxl/core/bus.c    | 60 +++++++++++++++++++++++++++++++++++++++
> > >  drivers/cxl/core/memdev.c |  6 ++++
> > >  drivers/cxl/cxl.h         |  2 ++
> > >  drivers/cxl/cxlmem.h      |  2 ++
> > >  drivers/cxl/mem.c         | 55 ++++++++++++++++++++++++++++++++++-
> > >  drivers/cxl/pci.c         | 23 ---------------
> > >  drivers/cxl/pci.h         |  7 ++++-
> > >  8 files changed, 141 insertions(+), 41 deletions(-)
> > >
> > > diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> > > index 7130beffc929..fd14094bdb3f 100644
> > > --- a/drivers/cxl/acpi.c
> > > +++ b/drivers/cxl/acpi.c
> > > @@ -240,21 +240,6 @@ __mock int match_add_root_ports(struct pci_dev *pdev, void *data)
> > >         return 0;
> > >  }
> > >
> > > -static struct cxl_dport *find_dport_by_dev(struct cxl_port *port, struct device *dev)
> > > -{
> > > -       struct cxl_dport *dport;
> > > -
> > > -       device_lock(&port->dev);
> > > -       list_for_each_entry(dport, &port->dports, list)
> > > -               if (dport->dport == dev) {
> > > -                       device_unlock(&port->dev);
> > > -                       return dport;
> > > -               }
> > > -
> > > -       device_unlock(&port->dev);
> > > -       return NULL;
> > > -}
> > > -
> > >  __mock struct acpi_device *to_cxl_host_bridge(struct device *host,
> > >                                               struct device *dev)
> > >  {
> > > @@ -459,9 +444,19 @@ static int cxl_acpi_probe(struct platform_device *pdev)
> > >         if (rc)
> > >                 goto out;
> > >
> > > -       if (IS_ENABLED(CONFIG_CXL_PMEM))
> > > +       if (IS_ENABLED(CONFIG_CXL_PMEM)) {
> > >                 rc = device_for_each_child(&root_port->dev, root_port,
> > >                                            add_root_nvdimm_bridge);
> > > +               if (rc)
> > > +                       goto out;
> > > +       }
> > > +
> > > +       /*
> > > +        * While ACPI is scanning hostbridge ports, switches and memory devices
> > > +        * may have been probed. Those devices will need to know whether the
> > > +        * hostbridge is CXL capable.
> > > +        */
> > > +       rc = bus_rescan_devices(&cxl_bus_type);
> >
> > I don't think it's a good idea to call bus_rescan_devices() from
> > probe() context. This now sets up a lockdep dependency between the
> > ACPI0017 device-lock and all the device-locks for every device on the
> > cxl-bus. This is why the nvdimm code punts the rescan outside the lock
> > to a workqueue.
> >
> > Lockdep unfortunately won't complain about device-lock entanglements.
> > One item for the backlog is to add device-lock validation to the cxl
> > subsystem ala commit 87a30e1f05d7 ("driver-core, libnvdimm: Let device
> > subsystems add local lockdep coverage")
>
> Good catch. I will fix. This is now the second time I tripped over that backlog
> item :-)
>
> >
> >
> > >
> > >  out:
> > >         acpi_put_table(acpi_cedt);
> > > diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c
> > > index 256e55dc2a3b..56f57302d27b 100644
> > > --- a/drivers/cxl/core/bus.c
> > > +++ b/drivers/cxl/core/bus.c
> > > @@ -8,6 +8,7 @@
> > >  #include <linux/idr.h>
> > >  #include <cxlmem.h>
> > >  #include <cxl.h>
> > > +#include <pci.h>
> > >  #include "core.h"
> > >
> > >  /**
> > > @@ -259,6 +260,12 @@ static const struct device_type cxl_port_type = {
> > >         .groups = cxl_port_attribute_groups,
> > >  };
> > >
> > > +bool is_cxl_port(struct device *dev)
> > > +{
> > > +       return dev->type == &cxl_port_type;
> > > +}
> > > +EXPORT_SYMBOL_GPL(is_cxl_port);
> > > +
> > >  struct cxl_port *to_cxl_port(struct device *dev)
> > >  {
> > >         if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type,
> > > @@ -266,6 +273,7 @@ struct cxl_port *to_cxl_port(struct device *dev)
> > >                 return NULL;
> > >         return container_of(dev, struct cxl_port, dev);
> > >  }
> > > +EXPORT_SYMBOL_GPL(to_cxl_port);
> > >
> > >  static void unregister_port(void *_port)
> > >  {
> > > @@ -424,6 +432,27 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new)
> > >         return dup ? -EEXIST : 0;
> > >  }
> > >
> > > +/**
> > > + * find_dport_by_dev - gets downstream CXL port from a struct device
> > > + * @port: cxl [upstream] port that "owns" the downstream port is being queried
> > > + * @dev: The device that is backing the downstream port
> > > + */
> > > +struct cxl_dport *find_dport_by_dev(struct cxl_port *port, const struct device *dev)
> > > +{
> > > +       struct cxl_dport *dport;
> > > +
> > > +       device_lock(&port->dev);
> > > +       list_for_each_entry(dport, &port->dports, list)
> > > +               if (dport->dport == dev) {
> > > +                       device_unlock(&port->dev);
> > > +                       return dport;
> > > +               }
> > > +
> > > +       device_unlock(&port->dev);
> > > +       return NULL;
> > > +}
> > > +EXPORT_SYMBOL_GPL(find_dport_by_dev);
> >
> > This wants to move into the "cxl_" prefix symbol namespace if it's now
> > going to be a public function.
> >
> > > +
> > >  /**
> > >   * cxl_add_dport - append downstream port data to a cxl_port
> > >   * @port: the cxl_port that references this dport
> > > @@ -596,6 +625,37 @@ int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld)
> > >  }
> > >  EXPORT_SYMBOL_GPL(cxl_decoder_autoremove);
> > >
> > > +/**
> > > + * cxl_pci_dvsec - Gets offset for the given DVSEC id
> > > + * @pdev: PCI device to search for the DVSEC
> > > + * @dvsec: DVSEC id to look for
> > > + *
> > > + * Return: offset within the PCI header for the given DVSEC id. 0 if not found
> > > + */
> > > +int cxl_pci_dvsec(struct pci_dev *pdev, int dvsec)
> > > +{
> > > +       int pos;
> > > +
> > > +       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
> > > +       if (!pos)
> > > +               return 0;
> > > +
> > > +       while (pos) {
> > > +               u16 vendor, id;
> > > +
> > > +               pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
> > > +               pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
> > > +               if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
> > > +                       return pos;
> > > +
> > > +               pos = pci_find_next_ext_capability(pdev, pos,
> > > +                                                  PCI_EXT_CAP_ID_DVSEC);
> > > +       }
> > > +
> > > +       return 0;
> > > +}
> > > +EXPORT_SYMBOL_GPL(cxl_mem_dvsec);
> >
> > Why not keep this enumeration in cxl_pci and have it record the
> > component register block base address at cxl_memdev creation time?
> > This would make it similar to cxl_port creation that takes a
> > component_register base address argument.
> >
>
> It does that, this is needed in addition to that to find certain DVSEC registers
> that are used to determine CXL properties.

Ok, like HDM Base registers?

Should this information be passed in to the port creation rather than
retrieved after port creation?

Side tangent... is drivers/cxl/core/bus.c becoming a dumping ground?
I.e. is it time for drivers/cxl/core/{pci,port}.c?

>
> > > +
> > >  /**
> > >   * __cxl_driver_register - register a driver for the cxl bus
> > >   * @cxl_drv: cxl driver structure to attach
> > > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> > > index c9dd054bd813..0068b5ff5f3e 100644
> > > --- a/drivers/cxl/core/memdev.c
> > > +++ b/drivers/cxl/core/memdev.c
> > > @@ -337,3 +337,9 @@ void cxl_memdev_exit(void)
> > >  {
> > >         unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
> > >  }
> > > +
> > > +bool is_cxl_mem_capable(struct cxl_memdev *cxlmd)
> > > +{
> > > +       return !!cxlmd->dev.driver;
> > > +}
> > > +EXPORT_SYMBOL_GPL(is_cxl_mem_capable);
> >
> > Perhaps:
> >
> > s/capable/{enabled,routed}/
> >
> > The device is always capable, it's the hierarchy that will let it down.
>
> I can rename to routed. From my perspective, capable and enabled aren't
> synonymous. A capable device may not be enabled.
>
> >
> > > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> > > index b48bdbefd949..a168520d741b 100644
> > > --- a/drivers/cxl/cxl.h
> > > +++ b/drivers/cxl/cxl.h
> > > @@ -283,8 +283,10 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
> > >                                    resource_size_t component_reg_phys,
> > >                                    struct cxl_port *parent_port);
> > >
> > > +bool is_cxl_port(struct device *dev);
> > >  int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id,
> > >                   resource_size_t component_reg_phys);
> > > +struct cxl_dport *find_dport_by_dev(struct cxl_port *port, const struct device *dev);
> > >
> > >  struct cxl_decoder *to_cxl_decoder(struct device *dev);
> > >  bool is_root_decoder(struct device *dev);
> > > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> > > index 811b24451604..88264204c4b9 100644
> > > --- a/drivers/cxl/cxlmem.h
> > > +++ b/drivers/cxl/cxlmem.h
> > > @@ -51,6 +51,8 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
> > >  struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
> > >                                        struct cxl_mem *cxlm);
> > >
> > > +bool is_cxl_mem_capable(struct cxl_memdev *cxlmd);
> > > +
> > >  /**
> > >   * struct cxl_mbox_cmd - A command to be submitted to hardware.
> > >   * @opcode: (input) The command set and command submitted to hardware.
> > > diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> > > index 978a54b0a51a..b6dc34d18a86 100644
> > > --- a/drivers/cxl/mem.c
> > > +++ b/drivers/cxl/mem.c
> > > @@ -2,8 +2,10 @@
> > >  /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
> > >  #include <linux/device.h>
> > >  #include <linux/module.h>
> > > +#include <linux/pci.h>
> > >
> > >  #include "cxlmem.h"
> > > +#include "pci.h"
> > >
> > >  /**
> > >   * DOC: cxl mem
> > > @@ -17,9 +19,60 @@
> > >   * components.
> > >   */
> > >
> > > +static int port_match(struct device *dev, const void *data)
> > > +{
> > > +       struct cxl_port *port;
> > > +
> > > +       if (!is_cxl_port(dev))
> > > +               return 0;
> > > +
> > > +       port = to_cxl_port(dev);
> > > +
> > > +       if (find_dport_by_dev(port, (struct device *)data))
> > > +               return 1;
> > > +
> > > +       return 0;
> > > +}
> > > +
> > > +static bool is_cxl_mem_enabled(struct pci_dev *pdev)
> > > +{
> > > +       int pcie_dvsec;
> > > +       u16 dvsec_ctrl;
> > > +
> > > +       pcie_dvsec = cxl_pci_dvsec(pdev, PCI_DVSEC_ID_PCIE_DVSEC_CXL_DVSEC_ID);
> > > +       if (!pcie_dvsec) {
> > > +               dev_info(&pdev->dev, "Unable to determine CXL protocol support");
> > > +               return false;
> > > +       }
> > > +
> > > +       pci_read_config_word(pdev,
> > > +                            pcie_dvsec + PCI_DVSEC_ID_CXL_PCIE_CTRL_OFFSET,
> > > +                            &dvsec_ctrl);
> > > +       if (!(dvsec_ctrl & CXL_PCIE_MEM_ENABLE)) {
> > > +               dev_info(&pdev->dev, "CXL.mem protocol not supported on device");
> > > +               return false;
> > > +       }
> > > +
> > > +       return true;
> > > +}
> > > +
> > >  static int cxl_mem_probe(struct device *dev)
> > >  {
> > > -       return -EOPNOTSUPP;
> > > +       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> > > +       struct cxl_mem *cxlm = cxlmd->cxlm;
> > > +       struct device *pdev_parent = cxlm->dev->parent;
> > > +       struct pci_dev *pdev = to_pci_dev(cxlm->dev);
> >
> > It's not safe to assume that the parent of a cxlmd is a pci device.
> >
>
> What can it be then? Isn't the parent always going to be a downstream port,
> root port, or emulated port from cxl_test?

It was a nice property of the recent reworks that
drivers/cxl/core/memdev.c was able to drop its include of
<linux/pci.h> because drivers/cxl/pci.c handled all of those details.
So I'd prefer that any extra PCI details that memdev needs be probed
by the PCI driver and passed to the creation of the memdev device,
like the component register location.

This preserves the ability of cxl_test to implement the ioctl and
sysfs ABI without needing to also emulate PCI, but also just keeps PCI
concerns in files named "pci".

>
> > > +       struct device *port_dev;
> > > +
> > > +       if (!is_cxl_mem_enabled(pdev))
> > > +               return -ENODEV;
> >
> > This isn't sufficient, this needs to walk the entire hierarchy, right?
> >
>
> It was saved to a later patch because I have no way to actually test deeper
> hierarchies at the moment. After I had already sent this, you and I discussed
> not doing that. I can merge it together if there's no perceived value in keeping
> them separate, which it sounds like there isn't.

Yeah, let's keep heading that direction...

>
> > > +
> > > +       /* TODO: if parent is a switch, this will fail. */
> >
> > Won't the parent be a switch in all cases? For example, even in QEMU
> > today the parent of the CXL device is the switch in the host bridge.
> >
> > # cat /sys/bus/cxl/devices/port1/decoder1.0/devtype
> > cxl_decoder_switch
>
> In QEMU (and I assume hardware) they aren't the same, root ports have a separate
> implementation from switches. That aside, the primary difference in the driver
> is that cxl_acpi enumerates root ports so the cxl_mem driver can determine
> connectedness as long as cxl_acpi has run.

I know that PCI root ports are distinct from PCI switches, but the
Linux CXL topology is a Linux specific interpretation of the CXL spec
where cxl_decoder_switch applies equally to the 2nd-level+ decoders.
There is no cxl_decoder_root_port. So the comment on a minimum either
needs to be deleted or fixed up to be specific about
Linux-cxl_decoder_switch vs PCI/CXL-spec defined switch.
Ben Widawsky Sept. 14, 2021, 10:55 p.m. UTC | #6
On 21-09-14 15:42:01, Dan Williams wrote:
> On Mon, Sep 13, 2021 at 3:10 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
> >
> > On 21-09-10 14:59:29, Dan Williams wrote:
> > > On Thu, Sep 2, 2021 at 12:50 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
> > > >
> > > > If the "upstream" port of the endpoint is an enumerated downstream CXL
> > > > port, and the device itself is CXL capable and enabled, the memdev
> > > > driver can bind. This binding useful for region configuration/creation
> > > > because it provides a clean way for the region code to determine if the
> > > > memdev is actually CXL capable.
> > > >
> > > > A memdev/hostbridge probe race is solved with a full CXL bus rescan at
> > > > the end of ACPI probing (see comment in code for details). Switch
> > > > enumeration will be done as a follow-on patch. As a result, if a switch
> > > > is in the topology the memdev driver will not bind to any devices.
> > > >
> > > > CXL.mem capability is checked lazily at the time a region is bound.
> > > > This is in line with the other configuration parameters.
> > > >
> > > > Below is an example (mem0, and mem1) of CXL memdev devices that now
> > > > exist on the bus.
> > > >
> > > > /sys/bus/cxl/devices/
> > > > ├── decoder0.0 -> ../../../devices/platform/ACPI0017:00/root0/decoder0.0
> > > > ├── mem0 -> ../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem0
> > > > ├── mem1 -> ../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem1
> > >
> > > I'm confused, this isn't showing anything new that did not already
> > > exist before this patch? What I think would be a useful shortcut is
> > > for memX devices to have an attribute that links back to their cxl
> > > root port after validation completes. Like an attribute group that
> > > arrives and disappears when the driver successfully binds and unbinds
> > > respectively.
> > >
> >
> > This was a copy-paste mistake. I meant to show the memX devices under cxl_mem
> > drivers, like this:
> > # tree /sys/bus/cxl/drivers/
> > /sys/bus/cxl/drivers/
> > ├── cxl_mem
> > │   ├── bind
> > │   ├── mem0 -> ../../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem0
> > │   ├── mem1 -> ../../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem1
> 
> Ok, but that's still just typical sysfs, the new ground to highlight
> in my mind would be the path of the port added by the memX driver
> appearing in the hierarchy of CXL ports up to the host bridge from the
> device.

Okay. I assume core is going to need to grow an API to do that. Something like:
cxl_memdev_sysfs_rescan()?

> 
> > │   ├── uevent
> > │   └── unbind
> > ├── cxl_nvdimm
> > │   ├── bind
> > │   ├── uevent
> > │   └── unbind
> > └── cxl_nvdimm_bridge
> >     ├── bind
> >     ├── uevent
> >     └── unbind
> >
> >
> > While I'm not opposed to add a link as you mention, I don't yet see the utility.
> > Are you thinking as primarily a convenience for userspace tooling? Is this
> > useful if you have a switch in the path?
> 
> I was thinking it is useful for something like:
> 
> cxl list --memdevs --root-decoder=decoder0.0
> 
> ...where it filters all the memdev by the ones that can possibly
> participate in a given CFMWS range. Without a cache-copy of the root
> port hanging off of the memdev, cxl-cli will need to redo the work the
> kernel has already done to walk the CXL connectivity topology
> 
> This capability to list devices by their relationship to other objects
> proved powerful for "ndctl list".
> 

I find it hard to believe at this point that the tool won't need to walk the
topology anyway. It's a good goal however, so I'm in.

> >
> > > > ├── pmem0 -> ../../../devices/pci0000:34/0000:34:01.0/0000:36:00.0/mem0/pmem0
> > > > ├── pmem1 -> ../../../devices/pci0000:34/0000:34:00.0/0000:35:00.0/mem1/pmem1
> > > > ├── port1 -> ../../../devices/platform/ACPI0017:00/root0/port1
> > > > └── root0 -> ../../../devices/platform/ACPI0017:00/root0
> > > >
> > > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> > > > ---
> > > >  drivers/cxl/acpi.c        | 27 +++++++-----------
> > > >  drivers/cxl/core/bus.c    | 60 +++++++++++++++++++++++++++++++++++++++
> > > >  drivers/cxl/core/memdev.c |  6 ++++
> > > >  drivers/cxl/cxl.h         |  2 ++
> > > >  drivers/cxl/cxlmem.h      |  2 ++
> > > >  drivers/cxl/mem.c         | 55 ++++++++++++++++++++++++++++++++++-
> > > >  drivers/cxl/pci.c         | 23 ---------------
> > > >  drivers/cxl/pci.h         |  7 ++++-
> > > >  8 files changed, 141 insertions(+), 41 deletions(-)
> > > >
> > > > diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> > > > index 7130beffc929..fd14094bdb3f 100644
> > > > --- a/drivers/cxl/acpi.c
> > > > +++ b/drivers/cxl/acpi.c
> > > > @@ -240,21 +240,6 @@ __mock int match_add_root_ports(struct pci_dev *pdev, void *data)
> > > >         return 0;
> > > >  }
> > > >
> > > > -static struct cxl_dport *find_dport_by_dev(struct cxl_port *port, struct device *dev)
> > > > -{
> > > > -       struct cxl_dport *dport;
> > > > -
> > > > -       device_lock(&port->dev);
> > > > -       list_for_each_entry(dport, &port->dports, list)
> > > > -               if (dport->dport == dev) {
> > > > -                       device_unlock(&port->dev);
> > > > -                       return dport;
> > > > -               }
> > > > -
> > > > -       device_unlock(&port->dev);
> > > > -       return NULL;
> > > > -}
> > > > -
> > > >  __mock struct acpi_device *to_cxl_host_bridge(struct device *host,
> > > >                                               struct device *dev)
> > > >  {
> > > > @@ -459,9 +444,19 @@ static int cxl_acpi_probe(struct platform_device *pdev)
> > > >         if (rc)
> > > >                 goto out;
> > > >
> > > > -       if (IS_ENABLED(CONFIG_CXL_PMEM))
> > > > +       if (IS_ENABLED(CONFIG_CXL_PMEM)) {
> > > >                 rc = device_for_each_child(&root_port->dev, root_port,
> > > >                                            add_root_nvdimm_bridge);
> > > > +               if (rc)
> > > > +                       goto out;
> > > > +       }
> > > > +
> > > > +       /*
> > > > +        * While ACPI is scanning hostbridge ports, switches and memory devices
> > > > +        * may have been probed. Those devices will need to know whether the
> > > > +        * hostbridge is CXL capable.
> > > > +        */
> > > > +       rc = bus_rescan_devices(&cxl_bus_type);
> > >
> > > I don't think it's a good idea to call bus_rescan_devices() from
> > > probe() context. This now sets up a lockdep dependency between the
> > > ACPI0017 device-lock and all the device-locks for every device on the
> > > cxl-bus. This is why the nvdimm code punts the rescan outside the lock
> > > to a workqueue.
> > >
> > > Lockdep unfortunately won't complain about device-lock entanglements.
> > > One item for the backlog is to add device-lock validation to the cxl
> > > subsystem ala commit 87a30e1f05d7 ("driver-core, libnvdimm: Let device
> > > subsystems add local lockdep coverage")
> >
> > Good catch. I will fix. This is now the second time I tripped over that backlog
> > item :-)
> >
> > >
> > >
> > > >
> > > >  out:
> > > >         acpi_put_table(acpi_cedt);
> > > > diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c
> > > > index 256e55dc2a3b..56f57302d27b 100644
> > > > --- a/drivers/cxl/core/bus.c
> > > > +++ b/drivers/cxl/core/bus.c
> > > > @@ -8,6 +8,7 @@
> > > >  #include <linux/idr.h>
> > > >  #include <cxlmem.h>
> > > >  #include <cxl.h>
> > > > +#include <pci.h>
> > > >  #include "core.h"
> > > >
> > > >  /**
> > > > @@ -259,6 +260,12 @@ static const struct device_type cxl_port_type = {
> > > >         .groups = cxl_port_attribute_groups,
> > > >  };
> > > >
> > > > +bool is_cxl_port(struct device *dev)
> > > > +{
> > > > +       return dev->type == &cxl_port_type;
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(is_cxl_port);
> > > > +
> > > >  struct cxl_port *to_cxl_port(struct device *dev)
> > > >  {
> > > >         if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type,
> > > > @@ -266,6 +273,7 @@ struct cxl_port *to_cxl_port(struct device *dev)
> > > >                 return NULL;
> > > >         return container_of(dev, struct cxl_port, dev);
> > > >  }
> > > > +EXPORT_SYMBOL_GPL(to_cxl_port);
> > > >
> > > >  static void unregister_port(void *_port)
> > > >  {
> > > > @@ -424,6 +432,27 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new)
> > > >         return dup ? -EEXIST : 0;
> > > >  }
> > > >
> > > > +/**
> > > > + * find_dport_by_dev - gets downstream CXL port from a struct device
> > > > + * @port: cxl [upstream] port that "owns" the downstream port is being queried
> > > > + * @dev: The device that is backing the downstream port
> > > > + */
> > > > +struct cxl_dport *find_dport_by_dev(struct cxl_port *port, const struct device *dev)
> > > > +{
> > > > +       struct cxl_dport *dport;
> > > > +
> > > > +       device_lock(&port->dev);
> > > > +       list_for_each_entry(dport, &port->dports, list)
> > > > +               if (dport->dport == dev) {
> > > > +                       device_unlock(&port->dev);
> > > > +                       return dport;
> > > > +               }
> > > > +
> > > > +       device_unlock(&port->dev);
> > > > +       return NULL;
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(find_dport_by_dev);
> > >
> > > This wants to move into the "cxl_" prefix symbol namespace if it's now
> > > going to be a public function.
> > >
> > > > +
> > > >  /**
> > > >   * cxl_add_dport - append downstream port data to a cxl_port
> > > >   * @port: the cxl_port that references this dport
> > > > @@ -596,6 +625,37 @@ int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld)
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(cxl_decoder_autoremove);
> > > >
> > > > +/**
> > > > + * cxl_pci_dvsec - Gets offset for the given DVSEC id
> > > > + * @pdev: PCI device to search for the DVSEC
> > > > + * @dvsec: DVSEC id to look for
> > > > + *
> > > > + * Return: offset within the PCI header for the given DVSEC id. 0 if not found
> > > > + */
> > > > +int cxl_pci_dvsec(struct pci_dev *pdev, int dvsec)
> > > > +{
> > > > +       int pos;
> > > > +
> > > > +       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
> > > > +       if (!pos)
> > > > +               return 0;
> > > > +
> > > > +       while (pos) {
> > > > +               u16 vendor, id;
> > > > +
> > > > +               pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
> > > > +               pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
> > > > +               if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
> > > > +                       return pos;
> > > > +
> > > > +               pos = pci_find_next_ext_capability(pdev, pos,
> > > > +                                                  PCI_EXT_CAP_ID_DVSEC);
> > > > +       }
> > > > +
> > > > +       return 0;
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(cxl_mem_dvsec);
> > >
> > > Why not keep this enumeration in cxl_pci and have it record the
> > > component register block base address at cxl_memdev creation time?
> > > This would make it similar to cxl_port creation that takes a
> > > component_register base address argument.
> > >
> >
> > It does that, this is needed in addition to that to find certain DVSEC registers
> > that are used to determine CXL properties.
> 
> Ok, like HDM Base registers?
> 
> Should this information be passed in to the port creation rather than
> retrieved after port creation?

Current code is looking at fields in DVSEC actually. I'm fine though with the
goal of not having any PCI _stuff_ in mem.c and passing it along through
cxl_pci.

> 
> Side tangent... is drivers/cxl/core/bus.c becoming a dumping ground?
> I.e. is it time for drivers/cxl/core/{pci,port}.c?
> 

I think it's likely bus.c is bound to become a dumping ground to some extent.
This particular addition will go away as stated below. Probably regs.c could be
renamed to pci.c as a starting point when we cross the threshold.

> >
> > > > +
> > > >  /**
> > > >   * __cxl_driver_register - register a driver for the cxl bus
> > > >   * @cxl_drv: cxl driver structure to attach
> > > > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> > > > index c9dd054bd813..0068b5ff5f3e 100644
> > > > --- a/drivers/cxl/core/memdev.c
> > > > +++ b/drivers/cxl/core/memdev.c
> > > > @@ -337,3 +337,9 @@ void cxl_memdev_exit(void)
> > > >  {
> > > >         unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
> > > >  }
> > > > +
> > > > +bool is_cxl_mem_capable(struct cxl_memdev *cxlmd)
> > > > +{
> > > > +       return !!cxlmd->dev.driver;
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(is_cxl_mem_capable);
> > >
> > > Perhaps:
> > >
> > > s/capable/{enabled,routed}/
> > >
> > > The device is always capable, it's the hierarchy that will let it down.
> >
> > I can rename to routed. From my perspective, capable and enabled aren't
> > synonymous. A capable device may not be enabled.
> >
> > >
> > > > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> > > > index b48bdbefd949..a168520d741b 100644
> > > > --- a/drivers/cxl/cxl.h
> > > > +++ b/drivers/cxl/cxl.h
> > > > @@ -283,8 +283,10 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
> > > >                                    resource_size_t component_reg_phys,
> > > >                                    struct cxl_port *parent_port);
> > > >
> > > > +bool is_cxl_port(struct device *dev);
> > > >  int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id,
> > > >                   resource_size_t component_reg_phys);
> > > > +struct cxl_dport *find_dport_by_dev(struct cxl_port *port, const struct device *dev);
> > > >
> > > >  struct cxl_decoder *to_cxl_decoder(struct device *dev);
> > > >  bool is_root_decoder(struct device *dev);
> > > > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> > > > index 811b24451604..88264204c4b9 100644
> > > > --- a/drivers/cxl/cxlmem.h
> > > > +++ b/drivers/cxl/cxlmem.h
> > > > @@ -51,6 +51,8 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
> > > >  struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
> > > >                                        struct cxl_mem *cxlm);
> > > >
> > > > +bool is_cxl_mem_capable(struct cxl_memdev *cxlmd);
> > > > +
> > > >  /**
> > > >   * struct cxl_mbox_cmd - A command to be submitted to hardware.
> > > >   * @opcode: (input) The command set and command submitted to hardware.
> > > > diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> > > > index 978a54b0a51a..b6dc34d18a86 100644
> > > > --- a/drivers/cxl/mem.c
> > > > +++ b/drivers/cxl/mem.c
> > > > @@ -2,8 +2,10 @@
> > > >  /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
> > > >  #include <linux/device.h>
> > > >  #include <linux/module.h>
> > > > +#include <linux/pci.h>
> > > >
> > > >  #include "cxlmem.h"
> > > > +#include "pci.h"
> > > >
> > > >  /**
> > > >   * DOC: cxl mem
> > > > @@ -17,9 +19,60 @@
> > > >   * components.
> > > >   */
> > > >
> > > > +static int port_match(struct device *dev, const void *data)
> > > > +{
> > > > +       struct cxl_port *port;
> > > > +
> > > > +       if (!is_cxl_port(dev))
> > > > +               return 0;
> > > > +
> > > > +       port = to_cxl_port(dev);
> > > > +
> > > > +       if (find_dport_by_dev(port, (struct device *)data))
> > > > +               return 1;
> > > > +
> > > > +       return 0;
> > > > +}
> > > > +
> > > > +static bool is_cxl_mem_enabled(struct pci_dev *pdev)
> > > > +{
> > > > +       int pcie_dvsec;
> > > > +       u16 dvsec_ctrl;
> > > > +
> > > > +       pcie_dvsec = cxl_pci_dvsec(pdev, PCI_DVSEC_ID_PCIE_DVSEC_CXL_DVSEC_ID);
> > > > +       if (!pcie_dvsec) {
> > > > +               dev_info(&pdev->dev, "Unable to determine CXL protocol support");
> > > > +               return false;
> > > > +       }
> > > > +
> > > > +       pci_read_config_word(pdev,
> > > > +                            pcie_dvsec + PCI_DVSEC_ID_CXL_PCIE_CTRL_OFFSET,
> > > > +                            &dvsec_ctrl);
> > > > +       if (!(dvsec_ctrl & CXL_PCIE_MEM_ENABLE)) {
> > > > +               dev_info(&pdev->dev, "CXL.mem protocol not supported on device");
> > > > +               return false;
> > > > +       }
> > > > +
> > > > +       return true;
> > > > +}
> > > > +
> > > >  static int cxl_mem_probe(struct device *dev)
> > > >  {
> > > > -       return -EOPNOTSUPP;
> > > > +       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> > > > +       struct cxl_mem *cxlm = cxlmd->cxlm;
> > > > +       struct device *pdev_parent = cxlm->dev->parent;
> > > > +       struct pci_dev *pdev = to_pci_dev(cxlm->dev);
> > >
> > > It's not safe to assume that the parent of a cxlmd is a pci device.
> > >
> >
> > What can it be then? Isn't the parent always going to be a downstream port,
> > root port, or emulated port from cxl_test?
> 
> It was a nice property of the recent reworks that
> drivers/cxl/core/memdev.c was able to drop its include of
> <linux/pci.h> because drivers/cxl/pci.c handled all of those details.
> So I'd prefer that any extra PCI details that memdev needs be probed
> by the PCI driver and passed to the creation of the memdev device,
> like the component register location.
> 
> This preserves the ability of cxl_test to implement the ioctl and
> sysfs ABI without needing to also emulate PCI, but also just keeps PCI
> concerns in files named "pci".

Okay. As stated I will pass along the info from cxl_pci or cxl_test.

> 
> >
> > > > +       struct device *port_dev;
> > > > +
> > > > +       if (!is_cxl_mem_enabled(pdev))
> > > > +               return -ENODEV;
> > >
> > > This isn't sufficient, this needs to walk the entire hierarchy, right?
> > >
> >
> > It was saved to a later patch because I have no way to actually test deeper
> > hierarchies at the moment. After I had already sent this, you and I discussed
> > not doing that. I can merge it together if there's no perceived value in keeping
> > them separate, which it sounds like there isn't.
> 
> Yeah, let's keep heading that direction...
> 
> >
> > > > +
> > > > +       /* TODO: if parent is a switch, this will fail. */
> > >
> > > Won't the parent be a switch in all cases? For example, even in QEMU
> > > today the parent of the CXL device is the switch in the host bridge.
> > >
> > > # cat /sys/bus/cxl/devices/port1/decoder1.0/devtype
> > > cxl_decoder_switch
> >
> > In QEMU (and I assume hardware) they aren't the same, root ports have a separate
> > implementation from switches. That aside, the primary difference in the driver
> > is that cxl_acpi enumerates root ports so the cxl_mem driver can determine
> > connectedness as long as cxl_acpi has run.
> 
> I know that PCI root ports are distinct from PCI switches, but the
> Linux CXL topology is a Linux specific interpretation of the CXL spec
> where cxl_decoder_switch applies equally to the 2nd-level+ decoders.
> There is no cxl_decoder_root_port. So the comment on a minimum either
> needs to be deleted or fixed up to be specific about
> Linux-cxl_decoder_switch vs PCI/CXL-spec defined switch.

I can fix up the comment, but per your earlier request the TODO will go away.
diff mbox series

Patch

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 7130beffc929..fd14094bdb3f 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -240,21 +240,6 @@  __mock int match_add_root_ports(struct pci_dev *pdev, void *data)
 	return 0;
 }
 
-static struct cxl_dport *find_dport_by_dev(struct cxl_port *port, struct device *dev)
-{
-	struct cxl_dport *dport;
-
-	device_lock(&port->dev);
-	list_for_each_entry(dport, &port->dports, list)
-		if (dport->dport == dev) {
-			device_unlock(&port->dev);
-			return dport;
-		}
-
-	device_unlock(&port->dev);
-	return NULL;
-}
-
 __mock struct acpi_device *to_cxl_host_bridge(struct device *host,
 					      struct device *dev)
 {
@@ -459,9 +444,19 @@  static int cxl_acpi_probe(struct platform_device *pdev)
 	if (rc)
 		goto out;
 
-	if (IS_ENABLED(CONFIG_CXL_PMEM))
+	if (IS_ENABLED(CONFIG_CXL_PMEM)) {
 		rc = device_for_each_child(&root_port->dev, root_port,
 					   add_root_nvdimm_bridge);
+		if (rc)
+			goto out;
+	}
+
+	/*
+	 * While ACPI is scanning hostbridge ports, switches and memory devices
+	 * may have been probed. Those devices will need to know whether the
+	 * hostbridge is CXL capable.
+	 */
+	rc = bus_rescan_devices(&cxl_bus_type);
 
 out:
 	acpi_put_table(acpi_cedt);
diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c
index 256e55dc2a3b..56f57302d27b 100644
--- a/drivers/cxl/core/bus.c
+++ b/drivers/cxl/core/bus.c
@@ -8,6 +8,7 @@ 
 #include <linux/idr.h>
 #include <cxlmem.h>
 #include <cxl.h>
+#include <pci.h>
 #include "core.h"
 
 /**
@@ -259,6 +260,12 @@  static const struct device_type cxl_port_type = {
 	.groups = cxl_port_attribute_groups,
 };
 
+bool is_cxl_port(struct device *dev)
+{
+	return dev->type == &cxl_port_type;
+}
+EXPORT_SYMBOL_GPL(is_cxl_port);
+
 struct cxl_port *to_cxl_port(struct device *dev)
 {
 	if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type,
@@ -266,6 +273,7 @@  struct cxl_port *to_cxl_port(struct device *dev)
 		return NULL;
 	return container_of(dev, struct cxl_port, dev);
 }
+EXPORT_SYMBOL_GPL(to_cxl_port);
 
 static void unregister_port(void *_port)
 {
@@ -424,6 +432,27 @@  static int add_dport(struct cxl_port *port, struct cxl_dport *new)
 	return dup ? -EEXIST : 0;
 }
 
+/**
+ * find_dport_by_dev - gets downstream CXL port from a struct device
+ * @port: cxl [upstream] port that "owns" the downstream port is being queried
+ * @dev: The device that is backing the downstream port
+ */
+struct cxl_dport *find_dport_by_dev(struct cxl_port *port, const struct device *dev)
+{
+	struct cxl_dport *dport;
+
+	device_lock(&port->dev);
+	list_for_each_entry(dport, &port->dports, list)
+		if (dport->dport == dev) {
+			device_unlock(&port->dev);
+			return dport;
+		}
+
+	device_unlock(&port->dev);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(find_dport_by_dev);
+
 /**
  * cxl_add_dport - append downstream port data to a cxl_port
  * @port: the cxl_port that references this dport
@@ -596,6 +625,37 @@  int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld)
 }
 EXPORT_SYMBOL_GPL(cxl_decoder_autoremove);
 
+/**
+ * cxl_pci_dvsec - Gets offset for the given DVSEC id
+ * @pdev: PCI device to search for the DVSEC
+ * @dvsec: DVSEC id to look for
+ *
+ * Return: offset within the PCI header for the given DVSEC id. 0 if not found
+ */
+int cxl_pci_dvsec(struct pci_dev *pdev, int dvsec)
+{
+	int pos;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
+	if (!pos)
+		return 0;
+
+	while (pos) {
+		u16 vendor, id;
+
+		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
+		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
+		if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
+			return pos;
+
+		pos = pci_find_next_ext_capability(pdev, pos,
+						   PCI_EXT_CAP_ID_DVSEC);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_mem_dvsec);
+
 /**
  * __cxl_driver_register - register a driver for the cxl bus
  * @cxl_drv: cxl driver structure to attach
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index c9dd054bd813..0068b5ff5f3e 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -337,3 +337,9 @@  void cxl_memdev_exit(void)
 {
 	unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
 }
+
+bool is_cxl_mem_capable(struct cxl_memdev *cxlmd)
+{
+	return !!cxlmd->dev.driver;
+}
+EXPORT_SYMBOL_GPL(is_cxl_mem_capable);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index b48bdbefd949..a168520d741b 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -283,8 +283,10 @@  struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
 				   resource_size_t component_reg_phys,
 				   struct cxl_port *parent_port);
 
+bool is_cxl_port(struct device *dev);
 int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id,
 		  resource_size_t component_reg_phys);
+struct cxl_dport *find_dport_by_dev(struct cxl_port *port, const struct device *dev);
 
 struct cxl_decoder *to_cxl_decoder(struct device *dev);
 bool is_root_decoder(struct device *dev);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 811b24451604..88264204c4b9 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -51,6 +51,8 @@  static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
 struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
 				       struct cxl_mem *cxlm);
 
+bool is_cxl_mem_capable(struct cxl_memdev *cxlmd);
+
 /**
  * struct cxl_mbox_cmd - A command to be submitted to hardware.
  * @opcode: (input) The command set and command submitted to hardware.
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 978a54b0a51a..b6dc34d18a86 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -2,8 +2,10 @@ 
 /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
 #include <linux/device.h>
 #include <linux/module.h>
+#include <linux/pci.h>
 
 #include "cxlmem.h"
+#include "pci.h"
 
 /**
  * DOC: cxl mem
@@ -17,9 +19,60 @@ 
  * components.
  */
 
+static int port_match(struct device *dev, const void *data)
+{
+	struct cxl_port *port;
+
+	if (!is_cxl_port(dev))
+		return 0;
+
+	port = to_cxl_port(dev);
+
+	if (find_dport_by_dev(port, (struct device *)data))
+		return 1;
+
+	return 0;
+}
+
+static bool is_cxl_mem_enabled(struct pci_dev *pdev)
+{
+	int pcie_dvsec;
+	u16 dvsec_ctrl;
+
+	pcie_dvsec = cxl_pci_dvsec(pdev, PCI_DVSEC_ID_PCIE_DVSEC_CXL_DVSEC_ID);
+	if (!pcie_dvsec) {
+		dev_info(&pdev->dev, "Unable to determine CXL protocol support");
+		return false;
+	}
+
+	pci_read_config_word(pdev,
+			     pcie_dvsec + PCI_DVSEC_ID_CXL_PCIE_CTRL_OFFSET,
+			     &dvsec_ctrl);
+	if (!(dvsec_ctrl & CXL_PCIE_MEM_ENABLE)) {
+		dev_info(&pdev->dev, "CXL.mem protocol not supported on device");
+		return false;
+	}
+
+	return true;
+}
+
 static int cxl_mem_probe(struct device *dev)
 {
-	return -EOPNOTSUPP;
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_mem *cxlm = cxlmd->cxlm;
+	struct device *pdev_parent = cxlm->dev->parent;
+	struct pci_dev *pdev = to_pci_dev(cxlm->dev);
+	struct device *port_dev;
+
+	if (!is_cxl_mem_enabled(pdev))
+		return -ENODEV;
+
+	/* TODO: if parent is a switch, this will fail. */
+	port_dev = bus_find_device(&cxl_bus_type, NULL, pdev_parent, port_match);
+	if (!port_dev)
+		return -ENODEV;
+
+	return 0;
 }
 
 static void cxl_mem_remove(struct device *dev)
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 6931885c83ce..244b99948c40 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -335,29 +335,6 @@  static void cxl_pci_unmap_regblock(struct cxl_mem *cxlm, void __iomem *base)
 	pci_iounmap(to_pci_dev(cxlm->dev), base);
 }
 
-static int cxl_pci_dvsec(struct pci_dev *pdev, int dvsec)
-{
-	int pos;
-
-	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
-	if (!pos)
-		return 0;
-
-	while (pos) {
-		u16 vendor, id;
-
-		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
-		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
-		if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
-			return pos;
-
-		pos = pci_find_next_ext_capability(pdev, pos,
-						   PCI_EXT_CAP_ID_DVSEC);
-	}
-
-	return 0;
-}
-
 static int cxl_probe_regs(struct cxl_mem *cxlm, void __iomem *base,
 			  struct cxl_register_map *map)
 {
diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h
index 8c1a58813816..d6b9978d05b0 100644
--- a/drivers/cxl/pci.h
+++ b/drivers/cxl/pci.h
@@ -11,7 +11,10 @@ 
  */
 #define PCI_DVSEC_HEADER1_LENGTH_MASK	GENMASK(31, 20)
 #define PCI_DVSEC_VENDOR_ID_CXL		0x1E98
-#define PCI_DVSEC_ID_CXL		0x0
+
+#define PCI_DVSEC_ID_PCIE_DVSEC_CXL_DVSEC_ID	0x0
+#define PCI_DVSEC_ID_CXL_PCIE_CTRL_OFFSET	0xC
+#define   CXL_PCIE_MEM_ENABLE			BIT(2)
 
 #define PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID	0x8
 #define PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET	0xC
@@ -29,4 +32,6 @@ 
 
 #define CXL_REGLOC_ADDR_MASK GENMASK(31, 16)
 
+int cxl_pci_dvsec(struct pci_dev *pdev, int dvsec);
+
 #endif /* __CXL_PCI_H__ */