diff mbox series

[45/46] cxl/pmem: Fix offline_nvdimm_bus() to offline by bridge

Message ID 20220624041950.559155-20-dan.j.williams@intel.com
State Superseded
Headers show
Series CXL PMEM Region Provisioning | expand

Commit Message

Dan Williams June 24, 2022, 4:19 a.m. UTC
Be careful to only disable cxl_pmem objects related to a given
cxl_nvdimm_bridge. Otherwise, offline_nvdimm_bus() reaches across CXL
domains and disables more than is expected.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/cxl.h  |  1 +
 drivers/cxl/pmem.c | 21 +++++++++++++++++----
 2 files changed, 18 insertions(+), 4 deletions(-)

Comments

Jonathan Cameron June 30, 2022, 5:14 p.m. UTC | #1
On Thu, 23 Jun 2022 21:19:49 -0700
Dan Williams <dan.j.williams@intel.com> wrote:

> Be careful to only disable cxl_pmem objects related to a given
> cxl_nvdimm_bridge. Otherwise, offline_nvdimm_bus() reaches across CXL
> domains and disables more than is expected.
> 
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Fix, but not fixes tag? Probably wants a comment (I'm guessing
it didn't matter until now?)

By Domains, what do you mean?  I don't think we have that
well defined as a term.
Dan Williams July 11, 2022, 7:49 p.m. UTC | #2
Jonathan Cameron wrote:
> On Thu, 23 Jun 2022 21:19:49 -0700
> Dan Williams <dan.j.williams@intel.com> wrote:
> 
> > Be careful to only disable cxl_pmem objects related to a given
> > cxl_nvdimm_bridge. Otherwise, offline_nvdimm_bus() reaches across CXL
> > domains and disables more than is expected.
> > 
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> Fix, but not fixes tag? Probably wants a comment (I'm guessing
> it didn't matter until now?)

I'll add:

Fixes: 21083f51521f ("cxl/pmem: Register 'pmem' / cxl_nvdimm devices")

To date this has been a benign side effect since it only effects
cxl_test, but as cxl_test gets wider deployment it needs to meet the
expectation that any cxl_test operations have no effect on the
production stack. It might also be important if Device Tree adds
incremental CXL platform topology support.

> By Domains, what do you mean?  I don't think we have that
> well defined as a term.

By "domain" I meant a CXL topology hierarchy that a given memdev
attaches. In the userspace cxl-cli tool terms this is a "bus":

# cxl list -M -b "ACPI.CXL" | jq .[0]
{
  "memdev": "mem0",
  "pmem_size": 536870912,
  "ram_size": 0,
  "serial": 0,
  "host": "0000:35:00.0"
}

# cxl list -M -b "cxl_test" | jq .[0]
{
  "memdev": "mem2",
  "pmem_size": 1073741824,
  "ram_size": 1073741824,
  "serial": 1,
  "numa_node": 1,
  "host": "cxl_mem.1"
}

...where "-b" filters by the "bus" provider. This shows that mem0
derives its CXL.mem connectivity from the typical ACPI hierarchy, and
mem2 is in the "cxl_test" domain. I did not use the "bus" term in the
changelog because "bus" means something different to the kernel as both
of those devices are registered on @cxl_bus_type.
diff mbox series

Patch

diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index d6ff6337aa49..95f486bc1b41 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -411,6 +411,7 @@  struct cxl_nvdimm_bridge {
 struct cxl_nvdimm {
 	struct device dev;
 	struct cxl_memdev *cxlmd;
+	struct cxl_nvdimm_bridge *bridge;
 };
 
 /**
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index 0aaa70b4e0f7..b271f6e90b91 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -26,7 +26,10 @@  static void clear_exclusive(void *cxlds)
 
 static void unregister_nvdimm(void *nvdimm)
 {
+	struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
+
 	nvdimm_delete(nvdimm);
+	cxl_nvd->bridge = NULL;
 }
 
 static int cxl_nvdimm_probe(struct device *dev)
@@ -66,6 +69,7 @@  static int cxl_nvdimm_probe(struct device *dev)
 	}
 
 	dev_set_drvdata(dev, nvdimm);
+	cxl_nvd->bridge = cxl_nvb;
 	rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
 out:
 	device_unlock(&cxl_nvb->dev);
@@ -204,15 +208,23 @@  static bool online_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb)
 	return cxl_nvb->nvdimm_bus != NULL;
 }
 
-static int cxl_nvdimm_release_driver(struct device *dev, void *data)
+static int cxl_nvdimm_release_driver(struct device *dev, void *cxl_nvb)
 {
+	struct cxl_nvdimm *cxl_nvd;
+
 	if (!is_cxl_nvdimm(dev))
 		return 0;
+
+	cxl_nvd = to_cxl_nvdimm(dev);
+	if (cxl_nvd->bridge != cxl_nvb)
+		return 0;
+
 	device_release_driver(dev);
 	return 0;
 }
 
-static void offline_nvdimm_bus(struct nvdimm_bus *nvdimm_bus)
+static void offline_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb,
+			       struct nvdimm_bus *nvdimm_bus)
 {
 	if (!nvdimm_bus)
 		return;
@@ -222,7 +234,8 @@  static void offline_nvdimm_bus(struct nvdimm_bus *nvdimm_bus)
 	 * nvdimm_bus_unregister() rips the nvdimm objects out from
 	 * underneath them.
 	 */
-	bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_nvdimm_release_driver);
+	bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
+			 cxl_nvdimm_release_driver);
 	nvdimm_bus_unregister(nvdimm_bus);
 }
 
@@ -260,7 +273,7 @@  static void cxl_nvb_update_state(struct work_struct *work)
 
 		dev_dbg(&cxl_nvb->dev, "rescan: %d\n", rc);
 	}
-	offline_nvdimm_bus(victim_bus);
+	offline_nvdimm_bus(cxl_nvb, victim_bus);
 
 	put_device(&cxl_nvb->dev);
 }