diff mbox series

[1/5] cxl/core: Add cxl-bus driver infrastructure

Message ID 162336396329.2462439.16556923116284874437.stgit@dwillia2-desk3.amr.corp.intel.com (mailing list archive)
State Superseded
Headers show
Series cxl/pmem: Add core infrastructure for PMEM support | expand

Commit Message

Dan Williams June 10, 2021, 10:26 p.m. UTC
Enable devices on the 'cxl' bus to be attached to drivers. The initial
user of this functionality is a driver for an 'nvdimm-bridge' device
that anchors a libnvdimm hierarchy attached to CXL persistent memory
resources. Other device types that will leverage this include:

cxl_port: map and use component register functionality (HDM Decoders)

cxl_nvdimm: translate CXL memory expander endpoints to libnvdimm
	    'nvdimm' objects

cxl_region: translate CXL interleave sets to libnvdimm 'region' objects

The pairing of devices to drivers is handled through the cxl_device_id()
matching to cxl_driver.id values. A cxl_device_id() of '0' indicates no
driver support.

In addition to ->match(), ->probe(), and ->remove() support for the
'cxl' bus introduce MODULE_ALIAS_CXL() to autoload modules containing
cxl-drivers. Drivers are added in follow-on changes.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core.c |   73 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/cxl/cxl.h  |   22 ++++++++++++++++
 2 files changed, 95 insertions(+)

Comments

Ben Widawsky June 11, 2021, 5:47 p.m. UTC | #1
On 21-06-10 15:26:03, Dan Williams wrote:
> Enable devices on the 'cxl' bus to be attached to drivers. The initial
> user of this functionality is a driver for an 'nvdimm-bridge' device
> that anchors a libnvdimm hierarchy attached to CXL persistent memory
> resources. Other device types that will leverage this include:
> 
> cxl_port: map and use component register functionality (HDM Decoders)

Since I'm looking at this now, perhaps I can open the discussion here. Have you
thought about how this works yet? Right now I'm thinking there are two "drivers":
cxl_port: Switches (and ACPI0016)
cxl_mem: The memory device's HDM decoders

For port, probe() will figure out that the thing is an upstream port, call
cxl_probe_component_regs and then call devm_cxl_add_port(). I think that's
straight forward.

For the memory device we've already probed the thing via class code so there is
no need to use this driver registration, however, I think it would be nice to do
so. Is there a clean way to do that?

Also, I'd like to make sure we're on the same page about struct cxl_decoder.
Right now they are only created for active HDM decoders. Going forward, we can
either maintain a count of unused decoders on the given CXL component, or we can
instantiate a struct cxl_decoder that isn't active, ie. no interleave ways
granularit, base, etc. What's your thinking there?

> 
> cxl_nvdimm: translate CXL memory expander endpoints to libnvdimm
> 	    'nvdimm' objects
> 
> cxl_region: translate CXL interleave sets to libnvdimm 'region' objects
> 
> The pairing of devices to drivers is handled through the cxl_device_id()
> matching to cxl_driver.id values. A cxl_device_id() of '0' indicates no
> driver support.
> 
> In addition to ->match(), ->probe(), and ->remove() support for the
> 'cxl' bus introduce MODULE_ALIAS_CXL() to autoload modules containing
> cxl-drivers. Drivers are added in follow-on changes.
> 
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/cxl/core.c |   73 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/cxl.h  |   22 ++++++++++++++++
>  2 files changed, 95 insertions(+)
> 
> diff --git a/drivers/cxl/core.c b/drivers/cxl/core.c
> index 1b9ee0b08384..959cecc1f6bf 100644
> --- a/drivers/cxl/core.c
> +++ b/drivers/cxl/core.c
> @@ -767,8 +767,81 @@ int cxl_map_device_regs(struct pci_dev *pdev,
>  }
>  EXPORT_SYMBOL_GPL(cxl_map_device_regs);
>  
> +/**
> + * __cxl_driver_register - register a driver for the cxl bus
> + * @cxl_drv: cxl driver structure to attach
> + * @owner: owning module/driver
> + * @modname: KBUILD_MODNAME for parent driver
> + */
> +int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner,
> +			  const char *modname)
> +{
> +	if (!cxl_drv->probe) {
> +		pr_debug("%s ->probe() must be specified\n", modname);
> +		return -EINVAL;
> +	}
> +
> +	if (!cxl_drv->name) {
> +		pr_debug("%s ->name must be specified\n", modname);
> +		return -EINVAL;
> +	}
> +
> +	if (!cxl_drv->id) {
> +		pr_debug("%s ->id must be specified\n", modname);
> +		return -EINVAL;
> +	}
> +
> +	cxl_drv->drv.bus = &cxl_bus_type;
> +	cxl_drv->drv.owner = owner;
> +	cxl_drv->drv.mod_name = modname;
> +	cxl_drv->drv.name = cxl_drv->name;
> +
> +	return driver_register(&cxl_drv->drv);
> +}
> +EXPORT_SYMBOL_GPL(__cxl_driver_register);
> +
> +void cxl_driver_unregister(struct cxl_driver *cxl_drv)
> +{
> +	driver_unregister(&cxl_drv->drv);
> +}
> +EXPORT_SYMBOL_GPL(cxl_driver_unregister);
> +
> +static int cxl_device_id(struct device *dev)
> +{
> +	return 0;
> +}
> +
> +static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
> +{
> +	return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT,
> +			      cxl_device_id(dev));
> +}
> +
> +static int cxl_bus_match(struct device *dev, struct device_driver *drv)
> +{
> +	return cxl_device_id(dev) == to_cxl_drv(drv)->id;
> +}
> +
> +static int cxl_bus_probe(struct device *dev)
> +{
> +	return to_cxl_drv(dev->driver)->probe(dev);
> +}
> +
> +static int cxl_bus_remove(struct device *dev)
> +{
> +	struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver);
> +
> +	if (cxl_drv->remove)
> +		cxl_drv->remove(dev);
> +	return 0;
> +}
> +
>  struct bus_type cxl_bus_type = {
>  	.name = "cxl",
> +	.uevent = cxl_bus_uevent,
> +	.match = cxl_bus_match,
> +	.probe = cxl_bus_probe,
> +	.remove = cxl_bus_remove,
>  };
>  EXPORT_SYMBOL_GPL(cxl_bus_type);
>  
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index b988ea288f53..af2237d1c761 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -261,4 +261,26 @@ devm_cxl_add_passthrough_decoder(struct device *host, struct cxl_port *port)
>  }
>  
>  extern struct bus_type cxl_bus_type;
> +
> +struct cxl_driver {
> +	const char *name;
> +	int (*probe)(struct device *dev);
> +	void (*remove)(struct device *dev);
> +	struct device_driver drv;
> +	int id;
> +};
> +
> +static inline struct cxl_driver *to_cxl_drv(struct device_driver *drv)
> +{
> +	return container_of(drv, struct cxl_driver, drv);
> +}
> +
> +int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner,
> +			  const char *modname);
> +#define cxl_driver_register(x) __cxl_driver_register(x, THIS_MODULE, KBUILD_MODNAME)
> +void cxl_driver_unregister(struct cxl_driver *cxl_drv);
> +
> +#define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
> +#define CXL_MODALIAS_FMT "cxl:t%d"
> +
>  #endif /* __CXL_H__ */
>
Dan Williams June 11, 2021, 6:55 p.m. UTC | #2
On Fri, Jun 11, 2021 at 10:47 AM Ben Widawsky <ben.widawsky@intel.com> wrote:
>
> On 21-06-10 15:26:03, Dan Williams wrote:
> > Enable devices on the 'cxl' bus to be attached to drivers. The initial
> > user of this functionality is a driver for an 'nvdimm-bridge' device
> > that anchors a libnvdimm hierarchy attached to CXL persistent memory
> > resources. Other device types that will leverage this include:
> >
> > cxl_port: map and use component register functionality (HDM Decoders)
>
> Since I'm looking at this now, perhaps I can open the discussion here. Have you
> thought about how this works yet? Right now I'm thinking there are two "drivers":
> cxl_port: Switches (and ACPI0016)
> cxl_mem: The memory device's HDM decoders
>
> For port, probe() will figure out that the thing is an upstream port, call
> cxl_probe_component_regs and then call devm_cxl_add_port(). I think that's
> straight forward.

I was expecting cxl_port_driver.probe() comes *after* port discovery.
Think of it like PCI discovery. Some agent does the hardware topology
scan to add devices, in this case devm_cxl_add_port(), and that
triggers cxl_port_driver to load. So the initial enumeration done by
the cxl_acpi driver will populate the first two levels of the port
hierarchy with port objects and populate their component register
physical base addresses. For any other port deeper in the hierarchy I
was expecting that to be scanned after the discovery of a cxl_memdev
that is not attached to the current hierarchy. So, for example imagine
a config like:

Platform --> Host Bridge --> Switch --> Endpoint

...where in sysfs that's modeled as:

root0 --> port1 --> port2 --> port3

Where port3 is assuming that the CXL core models the device's
connection to the topology as yet another cxl_port. At the beginning
of time after cxl_acpi has loaded but before cxl_pci has discovered
the endpoint the topology is:

root0 --> port1

Upon the detection of the endpoint the CXL core can assume that all
intermediary switches between the root and this device have been
registered as PCI devices. So, it follows that endpoint device arrival
triggers "cxl_bus_rescan()" that goes and enumerates all the CXL
resources in the topology to produce:

root0 --> port1 --> port2 --> port3

> For the memory device we've already probed the thing via class code so there is
> no need to use this driver registration, however, I think it would be nice to do
> so. Is there a clean way to do that?

The PCI device associated with the endpoint is already probed, but the
cxl_memdev itself can have a driver on the CXL bus. So I think the
cxl_memdev driver should try to register a cxl_port after telling
cxl_acpi to rescan. If a check like "is_cxl_dport(pdev->dev.parent)"
for the endpoint returns false it means that the cxl_bus_rescan()
failed to enumerate the CXL topology to this endpoint and this
endpoint is limited to only CXL.io operation.

> Also, I'd like to make sure we're on the same page about struct cxl_decoder.
> Right now they are only created for active HDM decoders.

No, I was expecting they are also created for inactive ones. I am
thinking that all decoders ultimately belong to the cxl_acpi driver,
or whatever driver is acting as the root on a non-ACPI system. All
decoder programming is driven by region activation stimulus that asks
the root driver to try to establish a decode chain through the
hieararchy per a given region.

> Going forward, we can
> either maintain a count of unused decoders on the given CXL component, or we can
> instantiate a struct cxl_decoder that isn't active, ie. no interleave ways
> granularit, base, etc. What's your thinking there?

All resources are enumerated, just like PCI. Decode setup belongs to
the core, just like PCI MMIO resource setup. The difference is that
port drivers are needed to map component registers and service
requests from cxl_acpi to reconfigure, but other than that
cxl_decoders themselves don't have drivers and just reflect the
current state of what cxl_acpi / cxl_core have established.
Ben Widawsky June 11, 2021, 7:28 p.m. UTC | #3
On 21-06-11 11:55:39, Dan Williams wrote:
> On Fri, Jun 11, 2021 at 10:47 AM Ben Widawsky <ben.widawsky@intel.com> wrote:
> >
> > On 21-06-10 15:26:03, Dan Williams wrote:
> > > Enable devices on the 'cxl' bus to be attached to drivers. The initial
> > > user of this functionality is a driver for an 'nvdimm-bridge' device
> > > that anchors a libnvdimm hierarchy attached to CXL persistent memory
> > > resources. Other device types that will leverage this include:
> > >
> > > cxl_port: map and use component register functionality (HDM Decoders)
> >
> > Since I'm looking at this now, perhaps I can open the discussion here. Have you
> > thought about how this works yet? Right now I'm thinking there are two "drivers":
> > cxl_port: Switches (and ACPI0016)
> > cxl_mem: The memory device's HDM decoders
> >
> > For port, probe() will figure out that the thing is an upstream port, call
> > cxl_probe_component_regs and then call devm_cxl_add_port(). I think that's
> > straight forward.
> 
> I was expecting cxl_port_driver.probe() comes *after* port discovery.
> Think of it like PCI discovery. Some agent does the hardware topology
> scan to add devices, in this case devm_cxl_add_port(), and that
> triggers cxl_port_driver to load. So the initial enumeration done by
> the cxl_acpi driver will populate the first two levels of the port
> hierarchy with port objects and populate their component register
> physical base addresses. For any other port deeper in the hierarchy I
> was expecting that to be scanned after the discovery of a cxl_memdev
> that is not attached to the current hierarchy. So, for example imagine
> a config like:
> 
> Platform --> Host Bridge --> Switch --> Endpoint
> 
> ...where in sysfs that's modeled as:
> 
> root0 --> port1 --> port2 --> port3
> 
> Where port3 is assuming that the CXL core models the device's
> connection to the topology as yet another cxl_port. At the beginning
> of time after cxl_acpi has loaded but before cxl_pci has discovered
> the endpoint the topology is:
> 
> root0 --> port1
> 
> Upon the detection of the endpoint the CXL core can assume that all
> intermediary switches between the root and this device have been
> registered as PCI devices. So, it follows that endpoint device arrival
> triggers "cxl_bus_rescan()" that goes and enumerates all the CXL
> resources in the topology to produce:
> 
> root0 --> port1 --> port2 --> port3
> 

Ah, I had written about scan/rescan in an earlier version of my email but
dropped it. I was actually going to suggest it being a sysfs attr, but I'm fine
with it being implicit so long as...

How do we assert that cxl_pci doesn't run before cxl_acpi has done anything? I
like the idea that the endpoint device can simply ask cxl_acpi to rescan, I just
don't see how it works. I suppose we can queue up the requests to rescan in
cxl_acpi if the ordering can't be guaranteed.

> > For the memory device we've already probed the thing via class code so there is
> > no need to use this driver registration, however, I think it would be nice to do
> > so. Is there a clean way to do that?
> 
> The PCI device associated with the endpoint is already probed, but the
> cxl_memdev itself can have a driver on the CXL bus. So I think the
> cxl_memdev driver should try to register a cxl_port after telling
> cxl_acpi to rescan. If a check like "is_cxl_dport(pdev->dev.parent)"
> for the endpoint returns false it means that the cxl_bus_rescan()
> failed to enumerate the CXL topology to this endpoint and this
> endpoint is limited to only CXL.io operation.

What is going to invoke the memdev driver's probe? That is where we're talking
about putting that is_cxl_dport(...) right? That is the part that tripped me up
and inspired the original email FWIW.

> 
> > Also, I'd like to make sure we're on the same page about struct cxl_decoder.
> > Right now they are only created for active HDM decoders.
> 
> No, I was expecting they are also created for inactive ones. I am
> thinking that all decoders ultimately belong to the cxl_acpi driver,
> or whatever driver is acting as the root on a non-ACPI system. All
> decoder programming is driven by region activation stimulus that asks
> the root driver to try to establish a decode chain through the
> hieararchy per a given region.
> 
> > Going forward, we can
> > either maintain a count of unused decoders on the given CXL component, or we can
> > instantiate a struct cxl_decoder that isn't active, ie. no interleave ways
> > granularit, base, etc. What's your thinking there?
> 
> All resources are enumerated, just like PCI. Decode setup belongs to
> the core, just like PCI MMIO resource setup. The difference is that
> port drivers are needed to map component registers and service
> requests from cxl_acpi to reconfigure, but other than that
> cxl_decoders themselves don't have drivers and just reflect the
> current state of what cxl_acpi / cxl_core have established.

Okay.
Dan Williams June 11, 2021, 11:25 p.m. UTC | #4
On Fri, Jun 11, 2021 at 12:28 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
>
> On 21-06-11 11:55:39, Dan Williams wrote:
> > On Fri, Jun 11, 2021 at 10:47 AM Ben Widawsky <ben.widawsky@intel.com> wrote:
> > >
> > > On 21-06-10 15:26:03, Dan Williams wrote:
> > > > Enable devices on the 'cxl' bus to be attached to drivers. The initial
> > > > user of this functionality is a driver for an 'nvdimm-bridge' device
> > > > that anchors a libnvdimm hierarchy attached to CXL persistent memory
> > > > resources. Other device types that will leverage this include:
> > > >
> > > > cxl_port: map and use component register functionality (HDM Decoders)
> > >
> > > Since I'm looking at this now, perhaps I can open the discussion here. Have you
> > > thought about how this works yet? Right now I'm thinking there are two "drivers":
> > > cxl_port: Switches (and ACPI0016)
> > > cxl_mem: The memory device's HDM decoders
> > >
> > > For port, probe() will figure out that the thing is an upstream port, call
> > > cxl_probe_component_regs and then call devm_cxl_add_port(). I think that's
> > > straight forward.
> >
> > I was expecting cxl_port_driver.probe() comes *after* port discovery.
> > Think of it like PCI discovery. Some agent does the hardware topology
> > scan to add devices, in this case devm_cxl_add_port(), and that
> > triggers cxl_port_driver to load. So the initial enumeration done by
> > the cxl_acpi driver will populate the first two levels of the port
> > hierarchy with port objects and populate their component register
> > physical base addresses. For any other port deeper in the hierarchy I
> > was expecting that to be scanned after the discovery of a cxl_memdev
> > that is not attached to the current hierarchy. So, for example imagine
> > a config like:
> >
> > Platform --> Host Bridge --> Switch --> Endpoint
> >
> > ...where in sysfs that's modeled as:
> >
> > root0 --> port1 --> port2 --> port3
> >
> > Where port3 is assuming that the CXL core models the device's
> > connection to the topology as yet another cxl_port. At the beginning
> > of time after cxl_acpi has loaded but before cxl_pci has discovered
> > the endpoint the topology is:
> >
> > root0 --> port1
> >
> > Upon the detection of the endpoint the CXL core can assume that all
> > intermediary switches between the root and this device have been
> > registered as PCI devices. So, it follows that endpoint device arrival
> > triggers "cxl_bus_rescan()" that goes and enumerates all the CXL
> > resources in the topology to produce:
> >
> > root0 --> port1 --> port2 --> port3
> >
>
> Ah, I had written about scan/rescan in an earlier version of my email but
> dropped it. I was actually going to suggest it being a sysfs attr, but I'm fine
> with it being implicit so long as...
>
> How do we assert that cxl_pci doesn't run before cxl_acpi has done anything?

I don't think we need to, or it's broken if the driver load order
matters. The nvdimm enabling code is an example of how to handle this.
The cxl_nvdimm object can be registered before the cxl_nvdimm_bridge,
or after, does not matter. If the cxl_nvdimm comes first it will
trigger the cxl_nvdimm_driver to load. The cxl_nvdimm_driver.probe()
routine finds no bridge present and probe() returns with a failure.
When the bridge arrives it does a rescan  of the cxl_bus_type device
list and if it finds a cxl_nvdimm it re-triggers
cxl_nvdimm_driver.probe(). This time through cxl_nvdimm_driver.probe()
finds the bridge and registers the real nvdimm on the nvdimm_bus.

> I
> like the idea that the endpoint device can simply ask cxl_acpi to rescan, I just
> don't see how it works. I suppose we can queue up the requests to rescan in
> cxl_acpi if the ordering can't be guaranteed.

I think this means that the devm_cxl_add_port() would be triggered by
cxl_memdev_driver.probe() if and only if the parent pci_device of the
CXL endpoint is listed as a dport. If the cxl_memdev is registered
first the search it will search for the CXL root port on the
cxl_bus_type device list. If that fails then cxl_memdev_driver.probe()
fails. If that succeeds it asks the root to scan to the CXL endpoint
parent pci_device and return the confirmation that it is registered as
a dport. If that fails then the device is plugged into a pure PCIe
slot.

When cxl_acpi loads it retriggers all cxl_memdev_driver.probe() to
reconsider all cxl_memdev instances that failed to probe previously.

>
> > > For the memory device we've already probed the thing via class code so there is
> > > no need to use this driver registration, however, I think it would be nice to do
> > > so. Is there a clean way to do that?
> >
> > The PCI device associated with the endpoint is already probed, but the
> > cxl_memdev itself can have a driver on the CXL bus. So I think the
> > cxl_memdev driver should try to register a cxl_port after telling
> > cxl_acpi to rescan. If a check like "is_cxl_dport(pdev->dev.parent)"
> > for the endpoint returns false it means that the cxl_bus_rescan()
> > failed to enumerate the CXL topology to this endpoint and this
> > endpoint is limited to only CXL.io operation.
>
> What is going to invoke the memdev driver's probe? That is where we're talking
> about putting that is_cxl_dport(...) right? That is the part that tripped me up
> and inspired the original email FWIW.

I *think* I worked that out above, but yes please do poke at it to see
if it holds up.
Ben Widawsky June 14, 2021, 9:40 p.m. UTC | #5
On 21-06-11 16:25:05, Dan Williams wrote:
> On Fri, Jun 11, 2021 at 12:28 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
> >
> > On 21-06-11 11:55:39, Dan Williams wrote:
> > > On Fri, Jun 11, 2021 at 10:47 AM Ben Widawsky <ben.widawsky@intel.com> wrote:
> > > >
> > > > On 21-06-10 15:26:03, Dan Williams wrote:
> > > > > Enable devices on the 'cxl' bus to be attached to drivers. The initial
> > > > > user of this functionality is a driver for an 'nvdimm-bridge' device
> > > > > that anchors a libnvdimm hierarchy attached to CXL persistent memory
> > > > > resources. Other device types that will leverage this include:
> > > > >
> > > > > cxl_port: map and use component register functionality (HDM Decoders)
> > > >
> > > > Since I'm looking at this now, perhaps I can open the discussion here. Have you
> > > > thought about how this works yet? Right now I'm thinking there are two "drivers":
> > > > cxl_port: Switches (and ACPI0016)
> > > > cxl_mem: The memory device's HDM decoders
> > > >
> > > > For port, probe() will figure out that the thing is an upstream port, call
> > > > cxl_probe_component_regs and then call devm_cxl_add_port(). I think that's
> > > > straight forward.
> > >
> > > I was expecting cxl_port_driver.probe() comes *after* port discovery.
> > > Think of it like PCI discovery. Some agent does the hardware topology
> > > scan to add devices, in this case devm_cxl_add_port(), and that
> > > triggers cxl_port_driver to load. So the initial enumeration done by
> > > the cxl_acpi driver will populate the first two levels of the port
> > > hierarchy with port objects and populate their component register
> > > physical base addresses. For any other port deeper in the hierarchy I
> > > was expecting that to be scanned after the discovery of a cxl_memdev
> > > that is not attached to the current hierarchy. So, for example imagine
> > > a config like:
> > >
> > > Platform --> Host Bridge --> Switch --> Endpoint
> > >
> > > ...where in sysfs that's modeled as:
> > >
> > > root0 --> port1 --> port2 --> port3
> > >
> > > Where port3 is assuming that the CXL core models the device's
> > > connection to the topology as yet another cxl_port. At the beginning
> > > of time after cxl_acpi has loaded but before cxl_pci has discovered
> > > the endpoint the topology is:
> > >
> > > root0 --> port1
> > >
> > > Upon the detection of the endpoint the CXL core can assume that all
> > > intermediary switches between the root and this device have been
> > > registered as PCI devices. So, it follows that endpoint device arrival
> > > triggers "cxl_bus_rescan()" that goes and enumerates all the CXL
> > > resources in the topology to produce:
> > >
> > > root0 --> port1 --> port2 --> port3
> > >
> >
> > Ah, I had written about scan/rescan in an earlier version of my email but
> > dropped it. I was actually going to suggest it being a sysfs attr, but I'm fine
> > with it being implicit so long as...
> >
> > How do we assert that cxl_pci doesn't run before cxl_acpi has done anything?
> 
> I don't think we need to, or it's broken if the driver load order
> matters. The nvdimm enabling code is an example of how to handle this.
> The cxl_nvdimm object can be registered before the cxl_nvdimm_bridge,
> or after, does not matter. If the cxl_nvdimm comes first it will
> trigger the cxl_nvdimm_driver to load. The cxl_nvdimm_driver.probe()
> routine finds no bridge present and probe() returns with a failure.
> When the bridge arrives it does a rescan  of the cxl_bus_type device
> list and if it finds a cxl_nvdimm it re-triggers
> cxl_nvdimm_driver.probe(). This time through cxl_nvdimm_driver.probe()
> finds the bridge and registers the real nvdimm on the nvdimm_bus.
> 
> > I
> > like the idea that the endpoint device can simply ask cxl_acpi to rescan, I just
> > don't see how it works. I suppose we can queue up the requests to rescan in
> > cxl_acpi if the ordering can't be guaranteed.
> 
> I think this means that the devm_cxl_add_port() would be triggered by
> cxl_memdev_driver.probe() if and only if the parent pci_device of the
> CXL endpoint is listed as a dport. If the cxl_memdev is registered
> first the search it will search for the CXL root port on the
> cxl_bus_type device list. If that fails then cxl_memdev_driver.probe()
> fails. If that succeeds it asks the root to scan to the CXL endpoint
> parent pci_device and return the confirmation that it is registered as
> a dport. If that fails then the device is plugged into a pure PCIe
> slot.
> 
> When cxl_acpi loads it retriggers all cxl_memdev_driver.probe() to
> reconsider all cxl_memdev instances that failed to probe previously.
> 
> >
> > > > For the memory device we've already probed the thing via class code so there is
> > > > no need to use this driver registration, however, I think it would be nice to do
> > > > so. Is there a clean way to do that?
> > >
> > > The PCI device associated with the endpoint is already probed, but the
> > > cxl_memdev itself can have a driver on the CXL bus. So I think the
> > > cxl_memdev driver should try to register a cxl_port after telling
> > > cxl_acpi to rescan. If a check like "is_cxl_dport(pdev->dev.parent)"
> > > for the endpoint returns false it means that the cxl_bus_rescan()
> > > failed to enumerate the CXL topology to this endpoint and this
> > > endpoint is limited to only CXL.io operation.
> >
> > What is going to invoke the memdev driver's probe? That is where we're talking
> > about putting that is_cxl_dport(...) right? That is the part that tripped me up
> > and inspired the original email FWIW.
> 
> I *think* I worked that out above, but yes please do poke at it to see
> if it holds up.

I think it works. I have some concerns around synchronization of memdev probing
and cxl_acpi enumerating, but I believe it's workable.

Thanks for the thought.
diff mbox series

Patch

diff --git a/drivers/cxl/core.c b/drivers/cxl/core.c
index 1b9ee0b08384..959cecc1f6bf 100644
--- a/drivers/cxl/core.c
+++ b/drivers/cxl/core.c
@@ -767,8 +767,81 @@  int cxl_map_device_regs(struct pci_dev *pdev,
 }
 EXPORT_SYMBOL_GPL(cxl_map_device_regs);
 
+/**
+ * __cxl_driver_register - register a driver for the cxl bus
+ * @cxl_drv: cxl driver structure to attach
+ * @owner: owning module/driver
+ * @modname: KBUILD_MODNAME for parent driver
+ */
+int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner,
+			  const char *modname)
+{
+	if (!cxl_drv->probe) {
+		pr_debug("%s ->probe() must be specified\n", modname);
+		return -EINVAL;
+	}
+
+	if (!cxl_drv->name) {
+		pr_debug("%s ->name must be specified\n", modname);
+		return -EINVAL;
+	}
+
+	if (!cxl_drv->id) {
+		pr_debug("%s ->id must be specified\n", modname);
+		return -EINVAL;
+	}
+
+	cxl_drv->drv.bus = &cxl_bus_type;
+	cxl_drv->drv.owner = owner;
+	cxl_drv->drv.mod_name = modname;
+	cxl_drv->drv.name = cxl_drv->name;
+
+	return driver_register(&cxl_drv->drv);
+}
+EXPORT_SYMBOL_GPL(__cxl_driver_register);
+
+void cxl_driver_unregister(struct cxl_driver *cxl_drv)
+{
+	driver_unregister(&cxl_drv->drv);
+}
+EXPORT_SYMBOL_GPL(cxl_driver_unregister);
+
+static int cxl_device_id(struct device *dev)
+{
+	return 0;
+}
+
+static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT,
+			      cxl_device_id(dev));
+}
+
+static int cxl_bus_match(struct device *dev, struct device_driver *drv)
+{
+	return cxl_device_id(dev) == to_cxl_drv(drv)->id;
+}
+
+static int cxl_bus_probe(struct device *dev)
+{
+	return to_cxl_drv(dev->driver)->probe(dev);
+}
+
+static int cxl_bus_remove(struct device *dev)
+{
+	struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver);
+
+	if (cxl_drv->remove)
+		cxl_drv->remove(dev);
+	return 0;
+}
+
 struct bus_type cxl_bus_type = {
 	.name = "cxl",
+	.uevent = cxl_bus_uevent,
+	.match = cxl_bus_match,
+	.probe = cxl_bus_probe,
+	.remove = cxl_bus_remove,
 };
 EXPORT_SYMBOL_GPL(cxl_bus_type);
 
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index b988ea288f53..af2237d1c761 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -261,4 +261,26 @@  devm_cxl_add_passthrough_decoder(struct device *host, struct cxl_port *port)
 }
 
 extern struct bus_type cxl_bus_type;
+
+struct cxl_driver {
+	const char *name;
+	int (*probe)(struct device *dev);
+	void (*remove)(struct device *dev);
+	struct device_driver drv;
+	int id;
+};
+
+static inline struct cxl_driver *to_cxl_drv(struct device_driver *drv)
+{
+	return container_of(drv, struct cxl_driver, drv);
+}
+
+int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner,
+			  const char *modname);
+#define cxl_driver_register(x) __cxl_driver_register(x, THIS_MODULE, KBUILD_MODNAME)
+void cxl_driver_unregister(struct cxl_driver *cxl_drv);
+
+#define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
+#define CXL_MODALIAS_FMT "cxl:t%d"
+
 #endif /* __CXL_H__ */