diff mbox series

[v4,4/9] hugetlb: region_chg provides only cache entry

Message ID 20190910233146.206080-5-almasrymina@google.com (mailing list archive)
State New
Headers show
Series hugetlb_cgroup: Add hugetlb_cgroup reservation limits | expand

Commit Message

Mina Almasry Sept. 10, 2019, 11:31 p.m. UTC
Current behavior is that region_chg provides both a cache entry in
resv->region_cache, AND a placeholder entry in resv->regions. region_add
first tries to use the placeholder, and if it finds that the placeholder
has been deleted by a racing region_del call, it uses the cache entry.

This behavior is completely unnecessary and is removed in this patch for
a couple of reasons:

1. region_add needs to either find a cached file_region entry in
   resv->region_cache, or find an entry in resv->regions to expand. It
   does not need both.
2. region_chg adding a placeholder entry in resv->regions opens up
   a possible race with region_del, where region_chg adds a placeholder
   region in resv->regions, and this region is deleted by a racing call
   to region_del during region_chg execution or before region_add is
   called. Removing the race makes the code easier to reason about and
   maintain.

In addition, a follow up patch in this series disables region
coalescing, which would be further complicated if the race with
region_del exists.

Signed-off-by: Mina Almasry <almasrymina@google.com>
---
 mm/hugetlb.c | 63 +++++++++-------------------------------------------
 1 file changed, 11 insertions(+), 52 deletions(-)

--
2.23.0.162.g0b9fbb3734-goog

Comments

Mike Kravetz Sept. 16, 2019, 10:17 p.m. UTC | #1
On 9/10/19 4:31 PM, Mina Almasry wrote:
> Current behavior is that region_chg provides both a cache entry in
> resv->region_cache, AND a placeholder entry in resv->regions. region_add
> first tries to use the placeholder, and if it finds that the placeholder
> has been deleted by a racing region_del call, it uses the cache entry.
> 
> This behavior is completely unnecessary and is removed in this patch for
> a couple of reasons:
> 
> 1. region_add needs to either find a cached file_region entry in
>    resv->region_cache, or find an entry in resv->regions to expand. It
>    does not need both.
> 2. region_chg adding a placeholder entry in resv->regions opens up
>    a possible race with region_del, where region_chg adds a placeholder
>    region in resv->regions, and this region is deleted by a racing call
>    to region_del during region_chg execution or before region_add is
>    called. Removing the race makes the code easier to reason about and
>    maintain.
> 
> In addition, a follow up patch in this series disables region
> coalescing, which would be further complicated if the race with
> region_del exists.
> 
> Signed-off-by: Mina Almasry <almasrymina@google.com>

Thanks.  I like this modification as it does simplify the code and could
be added as a general cleanup independent of the other changes.

Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
diff mbox series

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index fbd7c52e17348..bea51ae422f63 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -246,14 +246,10 @@  struct file_region {

 /*
  * Add the huge page range represented by [f, t) to the reserve
- * map.  In the normal case, existing regions will be expanded
- * to accommodate the specified range.  Sufficient regions should
- * exist for expansion due to the previous call to region_chg
- * with the same range.  However, it is possible that region_del
- * could have been called after region_chg and modifed the map
- * in such a way that no region exists to be expanded.  In this
- * case, pull a region descriptor from the cache associated with
- * the map and use that for the new range.
+ * map.  Existing regions will be expanded to accommodate the specified
+ * range, or a region will be taken from the cache.  Sufficient regions
+ * must exist in the cache due to the previous call to region_chg with
+ * the same range.
  *
  * Return the number of new huge pages added to the map.  This
  * number is greater than or equal to zero.
@@ -272,9 +268,8 @@  static long region_add(struct resv_map *resv, long f, long t)

 	/*
 	 * If no region exists which can be expanded to include the
-	 * specified range, the list must have been modified by an
-	 * interleving call to region_del().  Pull a region descriptor
-	 * from the cache and use it for this range.
+	 * specified range, pull a region descriptor from the cache
+	 * and use it for this range.
 	 */
 	if (&rg->link == head || t < rg->from) {
 		VM_BUG_ON(resv->region_cache_count <= 0);
@@ -339,15 +334,9 @@  static long region_add(struct resv_map *resv, long f, long t)
  * call to region_add that will actually modify the reserve
  * map to add the specified range [f, t).  region_chg does
  * not change the number of huge pages represented by the
- * map.  However, if the existing regions in the map can not
- * be expanded to represent the new range, a new file_region
- * structure is added to the map as a placeholder.  This is
- * so that the subsequent region_add call will have all the
- * regions it needs and will not fail.
- *
- * Upon entry, region_chg will also examine the cache of region descriptors
- * associated with the map.  If there are not enough descriptors cached, one
- * will be allocated for the in progress add operation.
+ * map.  A new file_region structure is added to the cache
+ * as a placeholder, so that the subsequent region_add
+ * call will have all the regions it needs and will not fail.
  *
  * Returns the number of huge pages that need to be added to the existing
  * reservation map for the range [f, t).  This number is greater or equal to
@@ -357,10 +346,9 @@  static long region_add(struct resv_map *resv, long f, long t)
 static long region_chg(struct resv_map *resv, long f, long t)
 {
 	struct list_head *head = &resv->regions;
-	struct file_region *rg, *nrg = NULL;
+	struct file_region *rg;
 	long chg = 0;

-retry:
 	spin_lock(&resv->lock);
 retry_locked:
 	resv->adds_in_progress++;
@@ -378,10 +366,8 @@  static long region_chg(struct resv_map *resv, long f, long t)
 		spin_unlock(&resv->lock);

 		trg = kmalloc(sizeof(*trg), GFP_KERNEL);
-		if (!trg) {
-			kfree(nrg);
+		if (!trg)
 			return -ENOMEM;
-		}

 		spin_lock(&resv->lock);
 		list_add(&trg->link, &resv->region_cache);
@@ -394,28 +380,6 @@  static long region_chg(struct resv_map *resv, long f, long t)
 		if (f <= rg->to)
 			break;

-	/* If we are below the current region then a new region is required.
-	 * Subtle, allocate a new region at the position but make it zero
-	 * size such that we can guarantee to record the reservation. */
-	if (&rg->link == head || t < rg->from) {
-		if (!nrg) {
-			resv->adds_in_progress--;
-			spin_unlock(&resv->lock);
-			nrg = kmalloc(sizeof(*nrg), GFP_KERNEL);
-			if (!nrg)
-				return -ENOMEM;
-
-			nrg->from = f;
-			nrg->to   = f;
-			INIT_LIST_HEAD(&nrg->link);
-			goto retry;
-		}
-
-		list_add(&nrg->link, rg->link.prev);
-		chg = t - f;
-		goto out_nrg;
-	}
-
 	/* Round our left edge to the current segment if it encloses us. */
 	if (f > rg->from)
 		f = rg->from;
@@ -439,11 +403,6 @@  static long region_chg(struct resv_map *resv, long f, long t)
 	}

 out:
-	spin_unlock(&resv->lock);
-	/*  We already know we raced and no longer need the new region */
-	kfree(nrg);
-	return chg;
-out_nrg:
 	spin_unlock(&resv->lock);
 	return chg;
 }