@@ -439,20 +439,20 @@ Description:
interleave_granularity).
-What: /sys/bus/cxl/devices/decoderX.Y/create_{pmem,ram}_region
-Date: May, 2022, January, 2023
-KernelVersion: v6.0 (pmem), v6.3 (ram)
+What: /sys/bus/cxl/devices/decoderX.Y/create_{pmem,ram,dc}_region
+Date: May, 2022, January, 2023, August 2024
+KernelVersion: v6.0 (pmem), v6.3 (ram), v6.13 (dc)
Contact: linux-cxl@vger.kernel.org
Description:
(RW) Write a string in the form 'regionZ' to start the process
- of defining a new persistent, or volatile memory region
- (interleave-set) within the decode range bounded by root decoder
- 'decoderX.Y'. The value written must match the current value
- returned from reading this attribute. An atomic compare exchange
- operation is done on write to assign the requested id to a
- region and allocate the region-id for the next creation attempt.
- EBUSY is returned if the region name written does not match the
- current cached value.
+ of defining a new persistent, volatile, or Dynamic Capacity
+ (DC) memory region (interleave-set) within the decode range
+ bounded by root decoder 'decoderX.Y'. The value written must
+ match the current value returned from reading this attribute.
+ An atomic compare exchange operation is done on write to assign
+ the requested id to a region and allocate the region-id for the
+ next creation attempt. EBUSY is returned if the region name
+ written does not match the current cached value.
What: /sys/bus/cxl/devices/decoderX.Y/delete_region
@@ -4,15 +4,27 @@
#ifndef __CXL_CORE_H__
#define __CXL_CORE_H__
+#include <cxlmem.h>
+
extern const struct device_type cxl_nvdimm_bridge_type;
extern const struct device_type cxl_nvdimm_type;
extern const struct device_type cxl_pmu_type;
extern struct attribute_group cxl_base_attribute_group;
+static inline struct cxl_memdev_state *
+cxled_to_mds(struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ return container_of(cxlds, struct cxl_memdev_state, cxlds);
+}
+
#ifdef CONFIG_CXL_REGION
extern struct device_attribute dev_attr_create_pmem_region;
extern struct device_attribute dev_attr_create_ram_region;
+extern struct device_attribute dev_attr_create_dc_region;
extern struct device_attribute dev_attr_delete_region;
extern struct device_attribute dev_attr_region;
extern const struct device_type cxl_pmem_region_type;
@@ -326,6 +326,7 @@ static struct attribute *cxl_decoder_root_attrs[] = {
&dev_attr_qos_class.attr,
SET_CXL_REGION_ATTR(create_pmem_region)
SET_CXL_REGION_ATTR(create_ram_region)
+ SET_CXL_REGION_ATTR(create_dc_region)
SET_CXL_REGION_ATTR(delete_region)
NULL,
};
@@ -496,6 +496,11 @@ static ssize_t interleave_ways_store(struct device *dev,
if (rc)
return rc;
+ if (cxlr->mode == CXL_REGION_DC && val != 1) {
+ dev_err(dev, "Interleaving and DCD not supported\n");
+ return -EINVAL;
+ }
+
rc = ways_to_eiw(val, &iw);
if (rc)
return rc;
@@ -2176,6 +2181,7 @@ static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
if (sysfs_streq(buf, "\n"))
rc = detach_target(cxlr, pos);
else {
+ struct cxl_endpoint_decoder *cxled;
struct device *dev;
dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf);
@@ -2187,8 +2193,13 @@ static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
goto out;
}
- rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos,
- TASK_INTERRUPTIBLE);
+ cxled = to_cxl_endpoint_decoder(dev);
+ if (cxlr->mode == CXL_REGION_DC &&
+ !cxl_dcd_supported(cxled_to_mds(cxled))) {
+ dev_dbg(dev, "DCD unsupported\n");
+ return -EINVAL;
+ }
+ rc = attach_target(cxlr, cxled, pos, TASK_INTERRUPTIBLE);
out:
put_device(dev);
}
@@ -2533,6 +2544,7 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
switch (mode) {
case CXL_REGION_RAM:
case CXL_REGION_PMEM:
+ case CXL_REGION_DC:
break;
default:
dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %s\n",
@@ -2586,6 +2598,20 @@ static ssize_t create_ram_region_store(struct device *dev,
}
DEVICE_ATTR_RW(create_ram_region);
+static ssize_t create_dc_region_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return __create_region_show(to_cxl_root_decoder(dev), buf);
+}
+
+static ssize_t create_dc_region_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return create_region_store(dev, buf, len, CXL_REGION_DC);
+}
+DEVICE_ATTR_RW(create_dc_region);
+
static ssize_t region_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -3168,6 +3194,11 @@ static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
struct device *dev;
int rc;
+ if (cxlr->mode == CXL_REGION_DC && cxlr->params.interleave_ways != 1) {
+ dev_err(&cxlr->dev, "Interleaving DC not supported\n");
+ return -EINVAL;
+ }
+
cxlr_dax = cxl_dax_region_alloc(cxlr);
if (IS_ERR(cxlr_dax))
return PTR_ERR(cxlr_dax);
@@ -3260,6 +3291,16 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
return ERR_PTR(-EINVAL);
mode = cxl_decoder_to_region_mode(cxled->mode);
+ if (mode == CXL_REGION_DC) {
+ if (!cxl_dcd_supported(cxled_to_mds(cxled))) {
+ dev_err(&cxled->cxld.dev, "DCD unsupported\n");
+ return ERR_PTR(-EINVAL);
+ }
+ if (cxled->cxld.interleave_ways != 1) {
+ dev_err(&cxled->cxld.dev, "Interleaving and DCD not supported\n");
+ return ERR_PTR(-EINVAL);
+ }
+ }
do {
cxlr = __create_region(cxlrd, mode,
atomic_read(&cxlrd->region_id));
@@ -3467,6 +3508,7 @@ static int cxl_region_probe(struct device *dev)
case CXL_REGION_PMEM:
return devm_cxl_add_pmem_region(cxlr);
case CXL_REGION_RAM:
+ case CXL_REGION_DC:
/*
* The region can not be manged by CXL if any portion of
* it is already online as 'System RAM'
@@ -178,6 +178,11 @@ static bool is_static(struct dax_region *dax_region)
return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
}
+static bool is_sparse(struct dax_region *dax_region)
+{
+ return (dax_region->res.flags & IORESOURCE_DAX_SPARSE_CAP) != 0;
+}
+
bool static_dev_dax(struct dev_dax *dev_dax)
{
return is_static(dev_dax->region);
@@ -301,6 +306,9 @@ static unsigned long long dax_region_avail_size(struct dax_region *dax_region)
lockdep_assert_held(&dax_region_rwsem);
+ if (is_sparse(dax_region))
+ return 0;
+
for_each_dax_region_resource(dax_region, res)
size -= resource_size(res);
return size;
@@ -1373,6 +1381,8 @@ static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
return 0;
if (a == &dev_attr_mapping.attr && is_static(dax_region))
return 0;
+ if (a == &dev_attr_mapping.attr && is_sparse(dax_region))
+ return 0;
if ((a == &dev_attr_align.attr ||
a == &dev_attr_size.attr) && is_static(dax_region))
return 0444;
@@ -13,6 +13,7 @@ struct dax_region;
/* dax bus specific ioresource flags */
#define IORESOURCE_DAX_STATIC BIT(0)
#define IORESOURCE_DAX_KMEM BIT(1)
+#define IORESOURCE_DAX_SPARSE_CAP BIT(2)
struct dax_region *alloc_dax_region(struct device *parent, int region_id,
struct range *range, int target_node, unsigned int align,
@@ -13,19 +13,31 @@ static int cxl_dax_region_probe(struct device *dev)
struct cxl_region *cxlr = cxlr_dax->cxlr;
struct dax_region *dax_region;
struct dev_dax_data data;
+ resource_size_t dev_size;
+ unsigned long flags;
if (nid == NUMA_NO_NODE)
nid = memory_add_physaddr_to_nid(cxlr_dax->hpa_range.start);
+ flags = IORESOURCE_DAX_KMEM;
+ if (cxlr->mode == CXL_REGION_DC)
+ flags |= IORESOURCE_DAX_SPARSE_CAP;
+
dax_region = alloc_dax_region(dev, cxlr->id, &cxlr_dax->hpa_range, nid,
- PMD_SIZE, IORESOURCE_DAX_KMEM);
+ PMD_SIZE, flags);
if (!dax_region)
return -ENOMEM;
+ if (cxlr->mode == CXL_REGION_DC)
+ /* Add empty seed dax device */
+ dev_size = 0;
+ else
+ dev_size = range_len(&cxlr_dax->hpa_range);
+
data = (struct dev_dax_data) {
.dax_region = dax_region,
.id = -1,
- .size = range_len(&cxlr_dax->hpa_range),
+ .size = dev_size,
.memmap_on_memory = true,
};