diff mbox series

[v4,5/8] hugetlb: call update_and_free_page without hugetlb_lock

Message ID 20210405230043.182734-6-mike.kravetz@oracle.com (mailing list archive)
State New, archived
Headers show
Series make hugetlb put_page safe for all calling contexts | expand

Commit Message

Mike Kravetz April 5, 2021, 11 p.m. UTC
With the introduction of remove_hugetlb_page(), there is no need for
update_and_free_page to hold the hugetlb lock.  Change all callers to
drop the lock before calling.

With additional code modifications, this will allow loops which decrease
the huge page pool to drop the hugetlb_lock with each page to reduce
long hold times.

The ugly unlock/lock cycle in free_pool_huge_page will be removed in
a subsequent patch which restructures free_pool_huge_page.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
---
 mm/hugetlb.c | 43 +++++++++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 10 deletions(-)

Comments

Michal Hocko April 6, 2021, 9:57 a.m. UTC | #1
On Mon 05-04-21 16:00:40, Mike Kravetz wrote:
> With the introduction of remove_hugetlb_page(), there is no need for
> update_and_free_page to hold the hugetlb lock.  Change all callers to
> drop the lock before calling.
> 
> With additional code modifications, this will allow loops which decrease
> the huge page pool to drop the hugetlb_lock with each page to reduce
> long hold times.
> 
> The ugly unlock/lock cycle in free_pool_huge_page will be removed in
> a subsequent patch which restructures free_pool_huge_page.
> 
> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>

Still looks good.

Acked-by: Michal Hocko <mhocko@suse.com>

> ---
>  mm/hugetlb.c | 43 +++++++++++++++++++++++++++++++++----------
>  1 file changed, 33 insertions(+), 10 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index df2a3d1f632b..be6031a8e2a9 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1446,16 +1446,18 @@ static void __free_huge_page(struct page *page)
>  
>  	if (HPageTemporary(page)) {
>  		remove_hugetlb_page(h, page, false);
> +		spin_unlock(&hugetlb_lock);
>  		update_and_free_page(h, page);
>  	} else if (h->surplus_huge_pages_node[nid]) {
>  		/* remove the page from active list */
>  		remove_hugetlb_page(h, page, true);
> +		spin_unlock(&hugetlb_lock);
>  		update_and_free_page(h, page);
>  	} else {
>  		arch_clear_hugepage_flags(page);
>  		enqueue_huge_page(h, page);
> +		spin_unlock(&hugetlb_lock);
>  	}
> -	spin_unlock(&hugetlb_lock);
>  }
>  
>  /*
> @@ -1736,7 +1738,13 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
>  				list_entry(h->hugepage_freelists[node].next,
>  					  struct page, lru);
>  			remove_hugetlb_page(h, page, acct_surplus);
> +			/*
> +			 * unlock/lock around update_and_free_page is temporary
> +			 * and will be removed with subsequent patch.
> +			 */
> +			spin_unlock(&hugetlb_lock);
>  			update_and_free_page(h, page);
> +			spin_lock(&hugetlb_lock);
>  			ret = 1;
>  			break;
>  		}
> @@ -1805,8 +1813,9 @@ int dissolve_free_huge_page(struct page *page)
>  		}
>  		remove_hugetlb_page(h, page, false);
>  		h->max_huge_pages--;
> +		spin_unlock(&hugetlb_lock);
>  		update_and_free_page(h, head);
> -		rc = 0;
> +		return 0;
>  	}
>  out:
>  	spin_unlock(&hugetlb_lock);
> @@ -2291,6 +2300,7 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
>  	gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
>  	int nid = page_to_nid(old_page);
>  	struct page *new_page;
> +	struct page *page_to_free;
>  	int ret = 0;
>  
>  	/*
> @@ -2313,16 +2323,16 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
>  		 * Freed from under us. Drop new_page too.
>  		 */
>  		remove_hugetlb_page(h, new_page, false);
> -		update_and_free_page(h, new_page);
> -		goto unlock;
> +		page_to_free = new_page;
> +		goto unlock_free;
>  	} else if (page_count(old_page)) {
>  		/*
>  		 * Someone has grabbed the page, try to isolate it here.
>  		 * Fail with -EBUSY if not possible.
>  		 */
>  		remove_hugetlb_page(h, new_page, false);
> -		update_and_free_page(h, new_page);
>  		spin_unlock(&hugetlb_lock);
> +		update_and_free_page(h, new_page);
>  		if (!isolate_huge_page(old_page, list))
>  			ret = -EBUSY;
>  		return ret;
> @@ -2344,11 +2354,12 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
>  		 * enqueue_huge_page for new page.  Net result is no change.
>  		 */
>  		remove_hugetlb_page(h, old_page, false);
> -		update_and_free_page(h, old_page);
>  		enqueue_huge_page(h, new_page);
> +		page_to_free = old_page;
>  	}
> -unlock:
> +unlock_free:
>  	spin_unlock(&hugetlb_lock);
> +	update_and_free_page(h, page_to_free);
>  
>  	return ret;
>  }
> @@ -2671,22 +2682,34 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
>  						nodemask_t *nodes_allowed)
>  {
>  	int i;
> +	struct page *page, *next;
> +	LIST_HEAD(page_list);
>  
>  	if (hstate_is_gigantic(h))
>  		return;
>  
> +	/*
> +	 * Collect pages to be freed on a list, and free after dropping lock
> +	 */
>  	for_each_node_mask(i, *nodes_allowed) {
> -		struct page *page, *next;
>  		struct list_head *freel = &h->hugepage_freelists[i];
>  		list_for_each_entry_safe(page, next, freel, lru) {
>  			if (count >= h->nr_huge_pages)
> -				return;
> +				goto out;
>  			if (PageHighMem(page))
>  				continue;
>  			remove_hugetlb_page(h, page, false);
> -			update_and_free_page(h, page);
> +			list_add(&page->lru, &page_list);
>  		}
>  	}
> +
> +out:
> +	spin_unlock(&hugetlb_lock);
> +	list_for_each_entry_safe(page, next, &page_list, lru) {
> +		update_and_free_page(h, page);
> +		cond_resched();
> +	}
> +	spin_lock(&hugetlb_lock);
>  }
>  #else
>  static inline void try_to_free_low(struct hstate *h, unsigned long count,
> -- 
> 2.30.2
>
Oscar Salvador April 7, 2021, 8:27 a.m. UTC | #2
On Mon, Apr 05, 2021 at 04:00:40PM -0700, Mike Kravetz wrote:
> With the introduction of remove_hugetlb_page(), there is no need for
> update_and_free_page to hold the hugetlb lock.  Change all callers to
> drop the lock before calling.
> 
> With additional code modifications, this will allow loops which decrease
> the huge page pool to drop the hugetlb_lock with each page to reduce
> long hold times.
> 
> The ugly unlock/lock cycle in free_pool_huge_page will be removed in
> a subsequent patch which restructures free_pool_huge_page.
> 
> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>

Without looking too close at the changes made to alloc_and_dissolve_huge_page():

Reviewed-by: Oscar Salvador <osalvador@suse.de>

One question below:

> @@ -2671,22 +2682,34 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
>  						nodemask_t *nodes_allowed)
>  {
>  	int i;
> +	struct page *page, *next;
> +	LIST_HEAD(page_list);
>  
>  	if (hstate_is_gigantic(h))
>  		return;
>  
> +	/*
> +	 * Collect pages to be freed on a list, and free after dropping lock
> +	 */
>  	for_each_node_mask(i, *nodes_allowed) {
> -		struct page *page, *next;
>  		struct list_head *freel = &h->hugepage_freelists[i];
>  		list_for_each_entry_safe(page, next, freel, lru) {
>  			if (count >= h->nr_huge_pages)
> -				return;
> +				goto out;
>  			if (PageHighMem(page))
>  				continue;
>  			remove_hugetlb_page(h, page, false);
> -			update_and_free_page(h, page);
> +			list_add(&page->lru, &page_list);
>  		}
>  	}
> +
> +out:
> +	spin_unlock(&hugetlb_lock);
> +	list_for_each_entry_safe(page, next, &page_list, lru) {
> +		update_and_free_page(h, page);
> +		cond_resched();
> +	}
> +	spin_lock(&hugetlb_lock);

Can we get here with an empty list? Maybe if someone raced with us manipulating
nr_huge_pages? AFAICS, this gets called under the lock, and the adjusting in
remove_hugetlb_page() gets also done under the lock, so I guess this is not
possible to happen.
The reason I am asking is whether we want to check for the list to be empty before
we do the unacquire/acquire lock dancing.
Michal Hocko April 7, 2021, 9:28 a.m. UTC | #3
On Wed 07-04-21 10:27:49, Oscar Salvador wrote:
> On Mon, Apr 05, 2021 at 04:00:40PM -0700, Mike Kravetz wrote:
[...]
> > @@ -2671,22 +2682,34 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
> >  						nodemask_t *nodes_allowed)
> >  {
> >  	int i;
> > +	struct page *page, *next;
> > +	LIST_HEAD(page_list);
> >  
> >  	if (hstate_is_gigantic(h))
> >  		return;
> >  
> > +	/*
> > +	 * Collect pages to be freed on a list, and free after dropping lock
> > +	 */
> >  	for_each_node_mask(i, *nodes_allowed) {
> > -		struct page *page, *next;
> >  		struct list_head *freel = &h->hugepage_freelists[i];
> >  		list_for_each_entry_safe(page, next, freel, lru) {
> >  			if (count >= h->nr_huge_pages)
> > -				return;
> > +				goto out;
> >  			if (PageHighMem(page))
> >  				continue;
> >  			remove_hugetlb_page(h, page, false);
> > -			update_and_free_page(h, page);
> > +			list_add(&page->lru, &page_list);
> >  		}
> >  	}
> > +
> > +out:
> > +	spin_unlock(&hugetlb_lock);
> > +	list_for_each_entry_safe(page, next, &page_list, lru) {
> > +		update_and_free_page(h, page);
> > +		cond_resched();
> > +	}
> > +	spin_lock(&hugetlb_lock);
> 
> Can we get here with an empty list?

An emoty page_list? If yes then sure, this can happen but
list_for_each_entry_safe will simply not iterate. Or what do you mean?
Oscar Salvador April 7, 2021, 9:37 a.m. UTC | #4
On Wed, Apr 07, 2021 at 11:28:51AM +0200, Michal Hocko wrote:
> An emoty page_list? If yes then sure, this can happen but
> list_for_each_entry_safe will simply not iterate. Or what do you mean?

Yes, I meant page_list.
Yeah, I figured list_for_each_entry_safe() would simply not iterate but I
wondered whether we still want the spin_unlock()/spin_lock() in that case.

But probably not worth it adding more code, so it is fine.

Thanks
diff mbox series

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index df2a3d1f632b..be6031a8e2a9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1446,16 +1446,18 @@  static void __free_huge_page(struct page *page)
 
 	if (HPageTemporary(page)) {
 		remove_hugetlb_page(h, page, false);
+		spin_unlock(&hugetlb_lock);
 		update_and_free_page(h, page);
 	} else if (h->surplus_huge_pages_node[nid]) {
 		/* remove the page from active list */
 		remove_hugetlb_page(h, page, true);
+		spin_unlock(&hugetlb_lock);
 		update_and_free_page(h, page);
 	} else {
 		arch_clear_hugepage_flags(page);
 		enqueue_huge_page(h, page);
+		spin_unlock(&hugetlb_lock);
 	}
-	spin_unlock(&hugetlb_lock);
 }
 
 /*
@@ -1736,7 +1738,13 @@  static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
 				list_entry(h->hugepage_freelists[node].next,
 					  struct page, lru);
 			remove_hugetlb_page(h, page, acct_surplus);
+			/*
+			 * unlock/lock around update_and_free_page is temporary
+			 * and will be removed with subsequent patch.
+			 */
+			spin_unlock(&hugetlb_lock);
 			update_and_free_page(h, page);
+			spin_lock(&hugetlb_lock);
 			ret = 1;
 			break;
 		}
@@ -1805,8 +1813,9 @@  int dissolve_free_huge_page(struct page *page)
 		}
 		remove_hugetlb_page(h, page, false);
 		h->max_huge_pages--;
+		spin_unlock(&hugetlb_lock);
 		update_and_free_page(h, head);
-		rc = 0;
+		return 0;
 	}
 out:
 	spin_unlock(&hugetlb_lock);
@@ -2291,6 +2300,7 @@  static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
 	gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
 	int nid = page_to_nid(old_page);
 	struct page *new_page;
+	struct page *page_to_free;
 	int ret = 0;
 
 	/*
@@ -2313,16 +2323,16 @@  static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
 		 * Freed from under us. Drop new_page too.
 		 */
 		remove_hugetlb_page(h, new_page, false);
-		update_and_free_page(h, new_page);
-		goto unlock;
+		page_to_free = new_page;
+		goto unlock_free;
 	} else if (page_count(old_page)) {
 		/*
 		 * Someone has grabbed the page, try to isolate it here.
 		 * Fail with -EBUSY if not possible.
 		 */
 		remove_hugetlb_page(h, new_page, false);
-		update_and_free_page(h, new_page);
 		spin_unlock(&hugetlb_lock);
+		update_and_free_page(h, new_page);
 		if (!isolate_huge_page(old_page, list))
 			ret = -EBUSY;
 		return ret;
@@ -2344,11 +2354,12 @@  static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
 		 * enqueue_huge_page for new page.  Net result is no change.
 		 */
 		remove_hugetlb_page(h, old_page, false);
-		update_and_free_page(h, old_page);
 		enqueue_huge_page(h, new_page);
+		page_to_free = old_page;
 	}
-unlock:
+unlock_free:
 	spin_unlock(&hugetlb_lock);
+	update_and_free_page(h, page_to_free);
 
 	return ret;
 }
@@ -2671,22 +2682,34 @@  static void try_to_free_low(struct hstate *h, unsigned long count,
 						nodemask_t *nodes_allowed)
 {
 	int i;
+	struct page *page, *next;
+	LIST_HEAD(page_list);
 
 	if (hstate_is_gigantic(h))
 		return;
 
+	/*
+	 * Collect pages to be freed on a list, and free after dropping lock
+	 */
 	for_each_node_mask(i, *nodes_allowed) {
-		struct page *page, *next;
 		struct list_head *freel = &h->hugepage_freelists[i];
 		list_for_each_entry_safe(page, next, freel, lru) {
 			if (count >= h->nr_huge_pages)
-				return;
+				goto out;
 			if (PageHighMem(page))
 				continue;
 			remove_hugetlb_page(h, page, false);
-			update_and_free_page(h, page);
+			list_add(&page->lru, &page_list);
 		}
 	}
+
+out:
+	spin_unlock(&hugetlb_lock);
+	list_for_each_entry_safe(page, next, &page_list, lru) {
+		update_and_free_page(h, page);
+		cond_resched();
+	}
+	spin_lock(&hugetlb_lock);
 }
 #else
 static inline void try_to_free_low(struct hstate *h, unsigned long count,