diff mbox

[08/14] libnvdimm, region: update nd_region_available_dpa() for multi-pmem support

Message ID 147585836356.22349.6803275913917776138.stgit@dwillia2-desk3.amr.corp.intel.com (mailing list archive)
State Accepted
Commit a1f3e4d6a0c3
Headers show

Commit Message

Dan Williams Oct. 7, 2016, 4:39 p.m. UTC
The free dpa (dimm-physical-address) space calculation reports how much
free space is available with consideration for aliased BLK + PMEM
regions.  Recall that BLK capacity is allocated from high addresses and
PMEM is allocated from low addresses in their respective regions.

nd_region_available_dpa() accounts for the fact that the largest
encroachment (lowest starting address) into PMEM capacity by a BLK
allocation limits the available capacity to that point, regardless if
there is BLK allocation hole at a higher address.  Similarly, for the
multi-pmem case we need to track the largest encroachment (highest
 ending address) of a PMEM allocation in BLK capacity regardless of
whether there is an allocation hole that a BLK allocation could fill at
a lower address.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/dimm_devs.c   |  174 +++++++++++++++++++++++++++++++++---------
 drivers/nvdimm/nd-core.h     |    2 
 drivers/nvdimm/region_devs.c |    5 -
 3 files changed, 139 insertions(+), 42 deletions(-)
diff mbox

Patch

diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index cf36470e94c0..4b0296ccb375 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -386,40 +386,148 @@  struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
 }
 EXPORT_SYMBOL_GPL(nvdimm_create);
 
+struct blk_alloc_info {
+	struct nd_mapping *nd_mapping;
+	resource_size_t available, busy;
+	struct resource *res;
+};
+
+static int alias_dpa_busy(struct device *dev, void *data)
+{
+	resource_size_t map_end, blk_start, new, busy;
+	struct blk_alloc_info *info = data;
+	struct nd_mapping *nd_mapping;
+	struct nd_region *nd_region;
+	struct nvdimm_drvdata *ndd;
+	struct resource *res;
+	int i;
+
+	if (!is_nd_pmem(dev))
+		return 0;
+
+	nd_region = to_nd_region(dev);
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		nd_mapping  = &nd_region->mapping[i];
+		if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
+			break;
+	}
+
+	if (i >= nd_region->ndr_mappings)
+		return 0;
+
+	ndd = to_ndd(nd_mapping);
+	map_end = nd_mapping->start + nd_mapping->size - 1;
+	blk_start = nd_mapping->start;
+ retry:
+	/*
+	 * Find the free dpa from the end of the last pmem allocation to
+	 * the end of the interleave-set mapping that is not already
+	 * covered by a blk allocation.
+	 */
+	busy = 0;
+	for_each_dpa_resource(ndd, res) {
+		if ((res->start >= blk_start && res->start < map_end)
+				|| (res->end >= blk_start
+					&& res->end <= map_end)) {
+			if (strncmp(res->name, "pmem", 4) == 0) {
+				new = max(blk_start, min(map_end + 1,
+							res->end + 1));
+				if (new != blk_start) {
+					blk_start = new;
+					goto retry;
+				}
+			} else
+				busy += min(map_end, res->end)
+					- max(nd_mapping->start, res->start) + 1;
+		} else if (nd_mapping->start > res->start
+				&& map_end < res->end) {
+			/* total eclipse of the PMEM region mapping */
+			busy += nd_mapping->size;
+			break;
+		}
+	}
+
+	info->available -= blk_start - nd_mapping->start + busy;
+	return 0;
+}
+
+static int blk_dpa_busy(struct device *dev, void *data)
+{
+	struct blk_alloc_info *info = data;
+	struct nd_mapping *nd_mapping;
+	struct nd_region *nd_region;
+	resource_size_t map_end;
+	int i;
+
+	if (!is_nd_pmem(dev))
+		return 0;
+
+	nd_region = to_nd_region(dev);
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		nd_mapping  = &nd_region->mapping[i];
+		if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
+			break;
+	}
+
+	if (i >= nd_region->ndr_mappings)
+		return 0;
+
+	map_end = nd_mapping->start + nd_mapping->size - 1;
+	if (info->res->start >= nd_mapping->start
+			&& info->res->start < map_end) {
+		if (info->res->end <= map_end) {
+			info->busy = 0;
+			return 1;
+		} else {
+			info->busy -= info->res->end - map_end;
+			return 0;
+		}
+	} else if (info->res->end >= nd_mapping->start
+			&& info->res->end <= map_end) {
+		info->busy -= nd_mapping->start - info->res->start;
+		return 0;
+	} else {
+		info->busy -= nd_mapping->size;
+		return 0;
+	}
+}
+
 /**
  * nd_blk_available_dpa - account the unused dpa of BLK region
  * @nd_mapping: container of dpa-resource-root + labels
  *
- * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges.
+ * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges, but
+ * we arrange for them to never start at an lower dpa than the last
+ * PMEM allocation in an aliased region.
  */
-resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping)
+resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
 {
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
+	struct nd_mapping *nd_mapping = &nd_region->mapping[0];
 	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
-	resource_size_t map_end, busy = 0, available;
+	struct blk_alloc_info info = {
+		.nd_mapping = nd_mapping,
+		.available = nd_mapping->size,
+	};
 	struct resource *res;
 
 	if (!ndd)
 		return 0;
 
-	map_end = nd_mapping->start + nd_mapping->size - 1;
-	for_each_dpa_resource(ndd, res)
-		if (res->start >= nd_mapping->start && res->start < map_end) {
-			resource_size_t end = min(map_end, res->end);
+	device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
 
-			busy += end - res->start + 1;
-		} else if (res->end >= nd_mapping->start
-				&& res->end <= map_end) {
-			busy += res->end - nd_mapping->start;
-		} else if (nd_mapping->start > res->start
-				&& nd_mapping->start < res->end) {
-			/* total eclipse of the BLK region mapping */
-			busy += nd_mapping->size;
-		}
+	/* now account for busy blk allocations in unaliased dpa */
+	for_each_dpa_resource(ndd, res) {
+		if (strncmp(res->name, "blk", 3) != 0)
+			continue;
 
-	available = map_end - nd_mapping->start + 1;
-	if (busy < available)
-		return available - busy;
-	return 0;
+		info.res = res;
+		info.busy = resource_size(res);
+		device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy);
+		info.available -= info.busy;
+	}
+
+	return info.available;
 }
 
 /**
@@ -451,21 +559,16 @@  resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
 	map_start = nd_mapping->start;
 	map_end = map_start + nd_mapping->size - 1;
 	blk_start = max(map_start, map_end + 1 - *overlap);
-	for_each_dpa_resource(ndd, res)
+	for_each_dpa_resource(ndd, res) {
 		if (res->start >= map_start && res->start < map_end) {
 			if (strncmp(res->name, "blk", 3) == 0)
-				blk_start = min(blk_start, res->start);
-			else if (res->start != map_start) {
+				blk_start = min(blk_start,
+						max(map_start, res->start));
+			else if (res->end > map_end) {
 				reason = "misaligned to iset";
 				goto err;
-			} else {
-				if (busy) {
-					reason = "duplicate overlapping PMEM reservations?";
-					goto err;
-				}
+			} else
 				busy += resource_size(res);
-				continue;
-			}
 		} else if (res->end >= map_start && res->end <= map_end) {
 			if (strncmp(res->name, "blk", 3) == 0) {
 				/*
@@ -474,15 +577,14 @@  resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
 				 * be used for BLK.
 				 */
 				blk_start = map_start;
-			} else {
-				reason = "misaligned to iset";
-				goto err;
-			}
+			} else
+				busy += resource_size(res);
 		} else if (map_start > res->start && map_start < res->end) {
 			/* total eclipse of the mapping */
 			busy += nd_mapping->size;
 			blk_start = map_start;
 		}
+	}
 
 	*overlap = map_end + 1 - blk_start;
 	available = blk_start - map_start;
@@ -491,10 +593,6 @@  resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
 	return 0;
 
  err:
-	/*
-	 * Something is wrong, PMEM must align with the start of the
-	 * interleave set, and there can only be one allocation per set.
-	 */
 	nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason);
 	return 0;
 }
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index fb3ade0d4a83..7c2196a1d56f 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -76,7 +76,7 @@  struct nd_mapping;
 void nd_mapping_free_labels(struct nd_mapping *nd_mapping);
 resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
 		struct nd_mapping *nd_mapping, resource_size_t *overlap);
-resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping);
+resource_size_t nd_blk_available_dpa(struct nd_region *nd_region);
 resource_size_t nd_region_available_dpa(struct nd_region *nd_region);
 resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
 		struct nd_label_id *label_id);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 19bcd68c4141..3ac534aec60c 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -294,9 +294,8 @@  resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
 				blk_max_overlap = overlap;
 				goto retry;
 			}
-		} else if (is_nd_blk(&nd_region->dev)) {
-			available += nd_blk_available_dpa(nd_mapping);
-		}
+		} else if (is_nd_blk(&nd_region->dev))
+			available += nd_blk_available_dpa(nd_region);
 	}
 
 	return available;