@@ -1055,6 +1055,18 @@ static void dc_extent_release(struct kref *kref)
kfree(extent);
}
+int __must_check cxl_dc_extent_get_not_zero(struct cxl_dc_extent_data *extent)
+{
+ return kref_get_unless_zero(&extent->region_ref);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_dc_extent_get_not_zero, CXL);
+
+void cxl_dc_extent_get(struct cxl_dc_extent_data *extent)
+{
+ kref_get(&extent->region_ref);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_dc_extent_get, CXL);
+
void cxl_dc_extent_put(struct cxl_dc_extent_data *extent)
{
kref_put(&extent->region_ref, dc_extent_release);
@@ -1547,6 +1547,122 @@ static int cxl_region_validate_position(struct cxl_region *cxlr,
return 0;
}
+static bool cxl_dc_extent_in_ed(struct cxl_endpoint_decoder *cxled,
+ struct cxl_dc_extent_data *extent)
+{
+ struct range dpa_range = (struct range){
+ .start = extent->dpa_start,
+ .end = extent->dpa_start + extent->length - 1,
+ };
+ struct device *dev = &cxled->cxld.dev;
+
+ dev_dbg(dev, "Checking extent DPA:%llx LEN:%llx\n",
+ extent->dpa_start, extent->length);
+
+ if (!cxled->cxld.region || !cxled->dpa_res)
+ return false;
+
+ dev_dbg(dev, "Cxled start:%llx end:%llx\n",
+ cxled->dpa_res->start, cxled->dpa_res->end);
+ return (cxled->dpa_res->start <= dpa_range.start &&
+ dpa_range.end <= cxled->dpa_res->end);
+}
+
+static int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
+ struct cxl_dc_extent_data *extent)
+{
+ struct cxl_dr_extent *cxl_dr_ext;
+ struct cxl_dax_region *cxlr_dax;
+ resource_size_t dpa_offset, hpa;
+ struct range *ed_hpa_range;
+ struct device *dev;
+ int rc;
+
+ cxlr_dax = cxled->cxld.region->cxlr_dax;
+ dev = &cxlr_dax->dev;
+ dev_dbg(dev, "Adding DC extent DPA:%llx LEN:%llx\n",
+ extent->dpa_start, extent->length);
+
+ /*
+ * Interleave ways == 1 means this coresponds to a 1:1 mapping between
+ * device extents and DAX region extents. Future implementations
+ * should hold DC region extents here until the full dax region extent
+ * can be realized.
+ */
+ if (cxlr_dax->cxlr->params.interleave_ways != 1) {
+ dev_err(dev, "Interleaving DC not supported\n");
+ return -EINVAL;
+ }
+
+ cxl_dr_ext = kzalloc(sizeof(*cxl_dr_ext), GFP_KERNEL);
+ if (!cxl_dr_ext)
+ return -ENOMEM;
+
+ cxl_dr_ext->extent = extent;
+ kref_init(&cxl_dr_ext->region_ref);
+
+ /*
+ * Without interleave...
+ * HPA offset == DPA offset
+ * ... but do the math anyway
+ */
+ dpa_offset = extent->dpa_start - cxled->dpa_res->start;
+ ed_hpa_range = &cxled->cxld.hpa_range;
+ hpa = ed_hpa_range->start + dpa_offset;
+ cxl_dr_ext->hpa_offset = hpa - cxlr_dax->hpa_range.start;
+
+ /* Without interleave carry length and label through */
+ cxl_dr_ext->hpa_length = extent->length;
+ snprintf(cxl_dr_ext->label, CXL_EXTENT_LABEL_LEN, "%s",
+ extent->tag);
+
+ dev_dbg(dev, "Inserting at HPA:%llx\n", cxl_dr_ext->hpa_offset);
+ rc = xa_insert(&cxlr_dax->extents, cxl_dr_ext->hpa_offset, cxl_dr_ext,
+ GFP_KERNEL);
+ if (rc) {
+ dev_err(dev, "Failed to insert extent %d\n", rc);
+ kfree(cxl_dr_ext);
+ return rc;
+ }
+ /* Put in cxl_dr_release() */
+ cxl_dc_extent_get(cxl_dr_ext->extent);
+ return 0;
+}
+
+static int cxl_ed_add_extents(struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_memdev_state *mds = container_of(cxlds,
+ struct cxl_memdev_state,
+ cxlds);
+ struct device *dev = &cxled->cxld.dev;
+ struct cxl_dc_extent_data *extent;
+ unsigned long index;
+
+ dev_dbg(dev, "Searching for DC extents\n");
+ xa_for_each(&mds->dc_extent_list, index, extent) {
+ /*
+ * get not zero is important because this is racing with the
+ * memory device which could be removing the extent at the same
+ * time.
+ */
+ if (cxl_dc_extent_get_not_zero(extent)) {
+ int rc = 0;
+
+ if (cxl_dc_extent_in_ed(cxled, extent)) {
+ dev_dbg(dev, "Found extent DPA:%llx LEN:%llx\n",
+ extent->dpa_start, extent->length);
+ rc = cxl_ed_add_one_extent(cxled, extent);
+ }
+ cxl_dc_extent_put(extent);
+ if (rc)
+ return rc;
+ }
+ }
+ return 0;
+}
+
static int cxl_region_attach_position(struct cxl_region *cxlr,
struct cxl_root_decoder *cxlrd,
struct cxl_endpoint_decoder *cxled,
@@ -2702,10 +2818,44 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
return cxlr_pmem;
}
+int __must_check cxl_dr_extent_get_not_zero(struct cxl_dr_extent *cxl_dr_ext)
+{
+ return kref_get_unless_zero(&cxl_dr_ext->region_ref);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_dr_extent_get_not_zero, CXL);
+
+void cxl_dr_extent_get(struct cxl_dr_extent *cxl_dr_ext)
+{
+ return kref_get(&cxl_dr_ext->region_ref);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_dr_extent_get, CXL);
+
+static void cxl_dr_release(struct kref *kref)
+{
+ struct cxl_dr_extent *cxl_dr_ext = container_of(kref,
+ struct cxl_dr_extent,
+ region_ref);
+
+ cxl_dc_extent_put(cxl_dr_ext->extent);
+ kfree(cxl_dr_ext);
+}
+
+void cxl_dr_extent_put(struct cxl_dr_extent *cxl_dr_ext)
+{
+ kref_put(&cxl_dr_ext->region_ref, cxl_dr_release);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_dr_extent_put, CXL);
+
static void cxl_dax_region_release(struct device *dev)
{
struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
+ struct cxl_dr_extent *cxl_dr_ext;
+ unsigned long index;
+ xa_for_each(&cxlr_dax->extents, index, cxl_dr_ext) {
+ xa_erase(&cxlr_dax->extents, index);
+ cxl_dr_extent_put(cxl_dr_ext);
+ }
kfree(cxlr_dax);
}
@@ -2756,6 +2906,7 @@ static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
cxlr_dax->hpa_range.start = p->res->start;
cxlr_dax->hpa_range.end = p->res->end;
+ xa_init(&cxlr_dax->extents);
dev = &cxlr_dax->dev;
cxlr_dax->cxlr = cxlr;
@@ -2862,7 +3013,17 @@ static void cxlr_dax_unregister(void *_cxlr_dax)
device_unregister(&cxlr_dax->dev);
}
-static int __devm_cxl_add_dax_region(struct cxl_region *cxlr)
+static int cxl_region_add_dc_extents(struct cxl_region *cxlr)
+{
+ for (int i = 0; i < cxlr->params.nr_targets; i++) {
+ int rc = cxl_ed_add_extents(cxlr->params.targets[i]);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
+static int __devm_cxl_add_dax_region(struct cxl_region *cxlr, bool is_dc)
{
struct cxl_dax_region *cxlr_dax;
struct device *dev;
@@ -2877,6 +3038,17 @@ static int __devm_cxl_add_dax_region(struct cxl_region *cxlr)
if (rc)
goto err;
+ cxlr->cxlr_dax = cxlr_dax;
+ if (is_dc) {
+ /*
+ * Process device extents prior to surfacing the device to
+ * ensure the cxl_dax_region driver has access to prior extents
+ */
+ rc = cxl_region_add_dc_extents(cxlr);
+ if (rc)
+ goto err;
+ }
+
rc = device_add(dev);
if (rc)
goto err;
@@ -2893,7 +3065,7 @@ static int __devm_cxl_add_dax_region(struct cxl_region *cxlr)
static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
{
- return __devm_cxl_add_dax_region(cxlr);
+ return __devm_cxl_add_dax_region(cxlr, false);
}
static int devm_cxl_add_dc_dax_region(struct cxl_region *cxlr)
@@ -2902,8 +3074,7 @@ static int devm_cxl_add_dc_dax_region(struct cxl_region *cxlr)
dev_err(&cxlr->dev, "Interleaving DC not supported\n");
return -EINVAL;
}
-
- return __devm_cxl_add_dax_region(cxlr);
+ return __devm_cxl_add_dax_region(cxlr, true);
}
static int match_decoder_by_range(struct device *dev, void *data)
@@ -555,6 +555,7 @@ struct cxl_region_params {
* @type: Endpoint decoder target type
* @cxl_nvb: nvdimm bridge for coordinating @cxlr_pmem setup / shutdown
* @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge
+ * @cxlr_dax: (for DC regions) cached copy of CXL DAX bridge
* @flags: Region state flags
* @params: active + config params for the region
*/
@@ -565,6 +566,7 @@ struct cxl_region {
enum cxl_decoder_type type;
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_pmem_region *cxlr_pmem;
+ struct cxl_dax_region *cxlr_dax;
unsigned long flags;
struct cxl_region_params params;
};
@@ -614,8 +616,22 @@ struct cxl_dax_region {
struct device dev;
struct cxl_region *cxlr;
struct range hpa_range;
+ struct xarray extents;
};
+/* Interleave will manage multiple cxl_dc_extent_data objects */
+#define CXL_EXTENT_LABEL_LEN 64
+struct cxl_dr_extent {
+ struct kref region_ref;
+ u64 hpa_offset;
+ u64 hpa_length;
+ char label[CXL_EXTENT_LABEL_LEN];
+ struct cxl_dc_extent_data *extent;
+};
+int cxl_dr_extent_get_not_zero(struct cxl_dr_extent *cxl_dr_ext);
+void cxl_dr_extent_get(struct cxl_dr_extent *cxl_dr_ext);
+void cxl_dr_extent_put(struct cxl_dr_extent *cxl_dr_ext);
+
/**
* struct cxl_port - logical collection of upstream port devices and
* downstream port devices to construct a CXL memory
@@ -1002,6 +1002,8 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd);
int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa);
int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa);
+int cxl_dc_extent_get_not_zero(struct cxl_dc_extent_data *extent);
+void cxl_dc_extent_get(struct cxl_dc_extent_data *extent);
void cxl_dc_extent_put(struct cxl_dc_extent_data *extent);
#ifdef CONFIG_CXL_SUSPEND
@@ -7,6 +7,7 @@ obj-$(CONFIG_DEV_DAX_CXL) += dax_cxl.o
dax-y := super.o
dax-y += bus.o
+dax-y += extent.o
device_dax-y := device.o
dax_pmem-y := pmem.o
dax_cxl-y := cxl.o
@@ -5,6 +5,87 @@
#include "../cxl/cxl.h"
#include "bus.h"
+#include "dax-private.h"
+
+static void dax_reg_ext_get(struct dax_region_extent *dr_extent)
+{
+ kref_get(&dr_extent->ref);
+}
+
+static void dr_release(struct kref *kref)
+{
+ struct dax_region_extent *dr_extent;
+ struct cxl_dr_extent *cxl_dr_ext;
+
+ dr_extent = container_of(kref, struct dax_region_extent, ref);
+ cxl_dr_ext = dr_extent->private_data;
+ cxl_dr_extent_put(cxl_dr_ext);
+ kfree(dr_extent);
+}
+
+static void dax_reg_ext_put(struct dax_region_extent *dr_extent)
+{
+ kref_put(&dr_extent->ref, dr_release);
+}
+
+static int cxl_dax_region_create_extent(struct dax_region *dax_region,
+ struct cxl_dr_extent *cxl_dr_ext)
+{
+ struct dax_region_extent *dr_extent;
+ int rc;
+
+ dr_extent = kzalloc(sizeof(*dr_extent), GFP_KERNEL);
+ if (!dr_extent)
+ return -ENOMEM;
+
+ dr_extent->private_data = cxl_dr_ext;
+ dr_extent->get = dax_reg_ext_get;
+ dr_extent->put = dax_reg_ext_put;
+
+ /* device manages the dr_extent on success */
+ kref_init(&dr_extent->ref);
+
+ rc = dax_region_ext_create_dev(dax_region, dr_extent,
+ cxl_dr_ext->hpa_offset,
+ cxl_dr_ext->hpa_length,
+ cxl_dr_ext->label);
+ if (rc) {
+ kfree(dr_extent);
+ return rc;
+ }
+
+ /* extent accepted */
+ cxl_dr_extent_get(cxl_dr_ext);
+ return 0;
+}
+
+static int cxl_dax_region_create_extents(struct cxl_dax_region *cxlr_dax)
+{
+ struct cxl_dr_extent *cxl_dr_ext;
+ unsigned long index;
+
+ dev_dbg(&cxlr_dax->dev, "Adding extents\n");
+ xa_for_each(&cxlr_dax->extents, index, cxl_dr_ext) {
+ /*
+ * get not zero is important because this is racing with the
+ * region driver which is racing with the memory device which
+ * could be removing the extent at the same time.
+ */
+ if (cxl_dr_extent_get_not_zero(cxl_dr_ext)) {
+ struct dax_region *dax_region;
+ int rc;
+
+ dax_region = dev_get_drvdata(&cxlr_dax->dev);
+ dev_dbg(&cxlr_dax->dev, "Found OFF:%llx LEN:%llx\n",
+ cxl_dr_ext->hpa_offset, cxl_dr_ext->hpa_length);
+ rc = cxl_dax_region_create_extent(dax_region, cxl_dr_ext);
+ cxl_dr_extent_put(cxl_dr_ext);
+ if (rc)
+ return rc;
+ }
+ }
+ return 0;
+}
static int cxl_dax_region_probe(struct device *dev)
{
@@ -19,20 +100,28 @@ static int cxl_dax_region_probe(struct device *dev)
if (nid == NUMA_NO_NODE)
nid = memory_add_physaddr_to_nid(cxlr_dax->hpa_range.start);
- dev_size = range_len(&cxlr_dax->hpa_range);
-
flags = IORESOURCE_DAX_KMEM;
- if (cxlr->mode == CXL_REGION_DC) {
- /* Add empty seed dax device */
- dev_size = 0;
+ if (cxlr->mode == CXL_REGION_DC)
flags |= IORESOURCE_DAX_DYNAMIC_CAP;
- }
dax_region = alloc_dax_region(dev, cxlr->id, &cxlr_dax->hpa_range, nid,
PMD_SIZE, flags);
if (!dax_region)
return -ENOMEM;
+ dev_size = range_len(&cxlr_dax->hpa_range);
+ if (cxlr->mode == CXL_REGION_DC) {
+ int rc;
+
+ /* NOTE: Depends on dax_region being set in driver data */
+ rc = cxl_dax_region_create_extents(cxlr_dax);
+ if (rc)
+ return rc;
+
+ /* Add empty seed dax device */
+ dev_size = 0;
+ }
+
data = (struct dev_dax_data) {
.dax_region = dax_region,
.id = -1,
@@ -5,6 +5,7 @@
#ifndef __DAX_PRIVATE_H__
#define __DAX_PRIVATE_H__
+#include <linux/pgtable.h>
#include <linux/device.h>
#include <linux/cdev.h>
#include <linux/idr.h>
@@ -40,6 +41,58 @@ struct dax_region {
struct device *youngest;
};
+/*
+ * struct dax_region_extent - extent data defined by the low level region
+ * driver.
+ * @private_data: lower level region driver data
+ * @ref: track number of dax devices which are using this extent
+ * @get: get reference to low level data
+ * @put: put reference to low level data
+ */
+struct dax_region_extent {
+ void *private_data;
+ struct kref ref;
+ void (*get)(struct dax_region_extent *dr_extent);
+ void (*put)(struct dax_region_extent *dr_extent);
+};
+
+static inline void dr_extent_get(struct dax_region_extent *dr_extent)
+{
+ if (dr_extent->get)
+ dr_extent->get(dr_extent);
+}
+
+static inline void dr_extent_put(struct dax_region_extent *dr_extent)
+{
+ if (dr_extent->put)
+ dr_extent->put(dr_extent);
+}
+
+#define DAX_EXTENT_LABEL_LEN 64
+/**
+ * struct dax_reg_ext_dev - Device object to expose extent information
+ * @dev: device representing this extent
+ * @dr_extent: reference back to private extent data
+ * @offset: offset of this extent
+ * @length: size of this extent
+ * @label: identifier to group extents
+ */
+struct dax_reg_ext_dev {
+ struct device dev;
+ struct dax_region_extent *dr_extent;
+ resource_size_t offset;
+ resource_size_t length;
+ char label[DAX_EXTENT_LABEL_LEN];
+};
+
+int dax_region_ext_create_dev(struct dax_region *dax_region,
+ struct dax_region_extent *dr_extent,
+ resource_size_t offset,
+ resource_size_t length,
+ const char *label);
+#define to_dr_ext_dev(dev) \
+ container_of(dev, struct dax_reg_ext_dev, dev)
+
struct dax_mapping {
struct device dev;
int range_id;
new file mode 100644
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2023 Intel Corporation. All rights reserved. */
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include "dax-private.h"
+
+static ssize_t length_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct dax_reg_ext_dev *dr_reg_ext_dev = to_dr_ext_dev(dev);
+
+ return sysfs_emit(buf, "%#llx\n", dr_reg_ext_dev->length);
+}
+static DEVICE_ATTR_RO(length);
+
+static ssize_t label_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct dax_reg_ext_dev *dr_reg_ext_dev = to_dr_ext_dev(dev);
+
+ return sysfs_emit(buf, "%s\n", dr_reg_ext_dev->label);
+}
+
+static ssize_t label_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct dax_reg_ext_dev *dr_reg_ext_dev = to_dr_ext_dev(dev);
+
+ snprintf(dr_reg_ext_dev->label, DAX_EXTENT_LABEL_LEN, "%s", buf);
+ return len;
+}
+static DEVICE_ATTR_RW(label);
+
+static struct attribute *dr_extent_attrs[] = {
+ &dev_attr_length.attr,
+ &dev_attr_label.attr,
+ NULL,
+};
+
+static const struct attribute_group dr_extent_attribute_group = {
+ .attrs = dr_extent_attrs,
+};
+
+static void dr_extent_release(struct device *dev)
+{
+ struct dax_reg_ext_dev *dr_reg_ext_dev = to_dr_ext_dev(dev);
+
+ kfree(dr_reg_ext_dev);
+}
+
+static const struct attribute_group *dr_extent_attribute_groups[] = {
+ &dr_extent_attribute_group,
+ NULL,
+};
+
+const struct device_type dr_extent_type = {
+ .name = "extent",
+ .release = dr_extent_release,
+ .groups = dr_extent_attribute_groups,
+};
+
+static void unregister_dr_extent(void *ext)
+{
+ struct dax_reg_ext_dev *dr_reg_ext_dev = ext;
+ struct dax_region_extent *dr_extent;
+
+ dr_extent = dr_reg_ext_dev->dr_extent;
+ dev_dbg(&dr_reg_ext_dev->dev, "Unregister DAX region ext OFF:%llx L:%s\n",
+ dr_reg_ext_dev->offset, dr_reg_ext_dev->label);
+ dr_extent_put(dr_extent);
+ device_unregister(&dr_reg_ext_dev->dev);
+}
+
+int dax_region_ext_create_dev(struct dax_region *dax_region,
+ struct dax_region_extent *dr_extent,
+ resource_size_t offset,
+ resource_size_t length,
+ const char *label)
+{
+ struct dax_reg_ext_dev *dr_reg_ext_dev;
+ struct device *dev;
+ int rc;
+
+ dr_reg_ext_dev = kzalloc(sizeof(*dr_reg_ext_dev), GFP_KERNEL);
+ if (!dr_reg_ext_dev)
+ return -ENOMEM;
+
+ dr_reg_ext_dev->dr_extent = dr_extent;
+ dr_reg_ext_dev->offset = offset;
+ dr_reg_ext_dev->length = length;
+ snprintf(dr_reg_ext_dev->label, DAX_EXTENT_LABEL_LEN, "%s", label);
+
+ dev = &dr_reg_ext_dev->dev;
+ device_initialize(dev);
+ dev->id = offset / PMD_SIZE ;
+ device_set_pm_not_required(dev);
+ dev->parent = dax_region->dev;
+ dev->type = &dr_extent_type;
+ rc = dev_set_name(dev, "extent%d", dev->id);
+ if (rc)
+ goto err;
+
+ rc = device_add(dev);
+ if (rc)
+ goto err;
+
+ dev_dbg(dev, "DAX region extent OFF:%llx LEN:%llx\n",
+ dr_reg_ext_dev->offset, dr_reg_ext_dev->length);
+ return devm_add_action_or_reset(dax_region->dev, unregister_dr_extent,
+ dr_reg_ext_dev);
+
+err:
+ dev_err(dev, "Failed to initialize DAX extent dev OFF:%llx LEN:%llx\n",
+ dr_reg_ext_dev->offset, dr_reg_ext_dev->length);
+ put_device(dev);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(dax_region_ext_create_dev);