@@ -223,6 +223,25 @@ void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(cxl_dpa_debug, CXL);
+static void cxl_skip_release(struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_dev_state *cxlds = cxled_to_memdev(cxled)->cxlds;
+ struct cxl_port *port = cxled_to_port(cxled);
+ struct device *dev = &port->dev;
+ unsigned long index;
+ void *entry;
+
+ xa_for_each(&cxled->skip_res, index, entry) {
+ struct resource *res = entry;
+
+ dev_dbg(dev, "decoder%d.%d: releasing skipped space; %pr\n",
+ port->id, cxled->cxld.id, res);
+ __release_region(&cxlds->dpa_res, res->start,
+ resource_size(res));
+ xa_erase(&cxled->skip_res, index);
+ }
+}
+
/*
* Must be called in a context that synchronizes against this decoder's
* port ->remove() callback (like an endpoint decoder sysfs attribute)
@@ -233,15 +252,11 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
struct cxl_port *port = cxled_to_port(cxled);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct resource *res = cxled->dpa_res;
- resource_size_t skip_start;
lockdep_assert_held_write(&cxl_dpa_rwsem);
- /* save @skip_start, before @res is released */
- skip_start = res->start - cxled->skip;
__release_region(&cxlds->dpa_res, res->start, resource_size(res));
- if (cxled->skip)
- __release_region(&cxlds->dpa_res, skip_start, cxled->skip);
+ cxl_skip_release(cxled);
cxled->skip = 0;
cxled->dpa_res = NULL;
put_device(&cxled->cxld.dev);
@@ -268,6 +283,105 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
__cxl_dpa_release(cxled);
}
+static int dc_mode_to_region_index(enum cxl_decoder_mode mode)
+{
+ return mode - CXL_DECODER_DC0;
+}
+
+static int cxl_request_skip(struct cxl_endpoint_decoder *cxled,
+ resource_size_t skip_base, resource_size_t skip_len)
+{
+ struct cxl_dev_state *cxlds = cxled_to_memdev(cxled)->cxlds;
+ const char *name = dev_name(&cxled->cxld.dev);
+ struct cxl_port *port = cxled_to_port(cxled);
+ struct resource *dpa_res = &cxlds->dpa_res;
+ struct device *dev = &port->dev;
+ struct resource *res;
+ int rc;
+
+ res = __request_region(dpa_res, skip_base, skip_len, name, 0);
+ if (!res)
+ return -EBUSY;
+
+ rc = xa_insert(&cxled->skip_res, skip_base, res, GFP_KERNEL);
+ if (rc) {
+ __release_region(dpa_res, skip_base, skip_len);
+ return rc;
+ }
+
+ dev_dbg(dev, "decoder%d.%d: skipped space; %pr\n",
+ port->id, cxled->cxld.id, res);
+ return 0;
+}
+
+static int cxl_reserve_dpa_skip(struct cxl_endpoint_decoder *cxled,
+ resource_size_t base, resource_size_t skipped)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *port = cxled_to_port(cxled);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ resource_size_t skip_base = base - skipped;
+ struct device *dev = &port->dev;
+ resource_size_t skip_len = 0;
+ int rc, index;
+
+ if (resource_size(&cxlds->ram_res) && skip_base <= cxlds->ram_res.end) {
+ skip_len = cxlds->ram_res.end - skip_base + 1;
+ rc = cxl_request_skip(cxled, skip_base, skip_len);
+ if (rc)
+ return rc;
+ skip_base += skip_len;
+ }
+
+ if (skip_base == base) {
+ dev_dbg(dev, "skip done ram!\n");
+ return 0;
+ }
+
+ if (resource_size(&cxlds->pmem_res) &&
+ skip_base <= cxlds->pmem_res.end) {
+ skip_len = cxlds->pmem_res.end - skip_base + 1;
+ rc = cxl_request_skip(cxled, skip_base, skip_len);
+ if (rc)
+ return rc;
+ skip_base += skip_len;
+ }
+
+ index = dc_mode_to_region_index(cxled->mode);
+ for (int i = 0; i <= index; i++) {
+ struct resource *dcr = &cxlds->dc_res[i];
+
+ if (skip_base < dcr->start) {
+ skip_len = dcr->start - skip_base;
+ rc = cxl_request_skip(cxled, skip_base, skip_len);
+ if (rc)
+ return rc;
+ skip_base += skip_len;
+ }
+
+ if (skip_base == base) {
+ dev_dbg(dev, "skip done DC region %d!\n", i);
+ break;
+ }
+
+ if (resource_size(dcr) && skip_base <= dcr->end) {
+ if (skip_base > base) {
+ dev_err(dev, "Skip error DC region %d; skip_base %pa; base %pa\n",
+ i, &skip_base, &base);
+ return -ENXIO;
+ }
+
+ skip_len = dcr->end - skip_base + 1;
+ rc = cxl_request_skip(cxled, skip_base, skip_len);
+ if (rc)
+ return rc;
+ skip_base += skip_len;
+ }
+ }
+
+ return 0;
+}
+
static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
resource_size_t base, resource_size_t len,
resource_size_t skipped)
@@ -305,13 +419,12 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
}
if (skipped) {
- res = __request_region(&cxlds->dpa_res, base - skipped, skipped,
- dev_name(&cxled->cxld.dev), 0);
- if (!res) {
- dev_dbg(dev,
- "decoder%d.%d: failed to reserve skipped space\n",
- port->id, cxled->cxld.id);
- return -EBUSY;
+ int rc = cxl_reserve_dpa_skip(cxled, base, skipped);
+
+ if (rc) {
+ dev_dbg(dev, "decoder%d.%d: failed to reserve skipped space; %pa - %pa\n",
+ port->id, cxled->cxld.id, &base, &skipped);
+ return rc;
}
}
res = __request_region(&cxlds->dpa_res, base, len,
@@ -319,14 +432,20 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
if (!res) {
dev_dbg(dev, "decoder%d.%d: failed to reserve allocation\n",
port->id, cxled->cxld.id);
- if (skipped)
- __release_region(&cxlds->dpa_res, base - skipped,
- skipped);
+ cxl_skip_release(cxled);
return -EBUSY;
}
cxled->dpa_res = res;
cxled->skip = skipped;
+ for (int mode = CXL_DECODER_DC0; mode <= CXL_DECODER_DC7; mode++) {
+ int index = dc_mode_to_region_index(mode);
+
+ if (resource_contains(&cxlds->dc_res[index], res)) {
+ cxled->mode = mode;
+ goto success;
+ }
+ }
if (resource_contains(&cxlds->pmem_res, res))
cxled->mode = CXL_DECODER_PMEM;
else if (resource_contains(&cxlds->ram_res, res))
@@ -337,6 +456,9 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
cxled->mode = CXL_DECODER_MIXED;
}
+success:
+ dev_dbg(dev, "decoder%d.%d: %pr mode: %d\n", port->id, cxled->cxld.id,
+ cxled->dpa_res, cxled->mode);
port->hdm_end++;
get_device(&cxled->cxld.dev);
return 0;
@@ -466,8 +588,8 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size)
{
- struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
resource_size_t free_ram_start, free_pmem_start;
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_port *port = cxled_to_port(cxled);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *dev = &cxled->cxld.dev;
@@ -524,12 +646,54 @@ int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size)
else
skip_end = start - 1;
skip = skip_end - skip_start + 1;
+ } else if (cxl_decoder_mode_is_dc(cxled->mode)) {
+ int dc_index = dc_mode_to_region_index(cxled->mode);
+
+ for (p = cxlds->dc_res[dc_index].child, last = NULL; p; p = p->sibling)
+ last = p;
+
+ if (last) {
+ /*
+ * Some capacity in this DC partition is already allocated,
+ * that allocation already handled the skip.
+ */
+ start = last->end + 1;
+ skip = 0;
+ } else {
+ /* Calculate skip */
+ resource_size_t skip_start, skip_end;
+
+ start = cxlds->dc_res[dc_index].start;
+
+ if ((resource_size(&cxlds->pmem_res) == 0) || !cxlds->pmem_res.child)
+ skip_start = free_ram_start;
+ else
+ skip_start = free_pmem_start;
+ /*
+ * If any dc region is already mapped, then that allocation
+ * already handled the RAM and PMEM skip. Check for DC region
+ * skip.
+ */
+ for (int i = dc_index - 1; i >= 0 ; i--) {
+ if (cxlds->dc_res[i].child) {
+ skip_start = cxlds->dc_res[i].child->end + 1;
+ break;
+ }
+ }
+
+ skip_end = start - 1;
+ skip = skip_end - skip_start + 1;
+ }
+ avail = cxlds->dc_res[dc_index].end - start + 1;
} else {
dev_dbg(dev, "mode not set\n");
rc = -EINVAL;
goto out;
}
+ dev_dbg(dev, "DPA Allocation start: %pa len: %#llx Skip: %pa\n",
+ &start, size, &skip);
+
if (size > avail) {
dev_dbg(dev, "%pa exceeds available %s capacity: %pa\n", &size,
cxl_decoder_mode_name(cxled->mode), &avail);
@@ -419,6 +419,7 @@ static void cxl_endpoint_decoder_release(struct device *dev)
struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
__cxl_decoder_release(&cxled->cxld);
+ xa_destroy(&cxled->skip_res);
kfree(cxled);
}
@@ -1899,6 +1900,7 @@ struct cxl_endpoint_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port)
return ERR_PTR(-ENOMEM);
cxled->pos = -1;
+ xa_init(&cxled->skip_res);
cxld = &cxled->cxld;
rc = cxl_decoder_init(port, cxld);
if (rc) {
@@ -446,6 +446,7 @@ enum cxl_decoder_state {
* @cxld: base cxl_decoder_object
* @dpa_res: actively claimed DPA span of this decoder
* @skip: offset into @dpa_res where @cxld.hpa_range maps
+ * @skip_res: array of skipped resources from the previous decoder end
* @mode: which memory type / access-mode-partition this decoder targets
* @state: autodiscovery state
* @pos: interleave position in @cxld.region
@@ -454,6 +455,7 @@ struct cxl_endpoint_decoder {
struct cxl_decoder cxld;
struct resource *dpa_res;
resource_size_t skip;
+ struct xarray skip_res;
enum cxl_decoder_mode mode;
enum cxl_decoder_state state;
int pos;