diff mbox series

[5/8] hugetlb: call update_and_free_page without hugetlb_lock

Message ID 20210325002835.216118-6-mike.kravetz@oracle.com (mailing list archive)
State New, archived
Headers show
Series make hugetlb put_page safe for all calling contexts | expand

Commit Message

Mike Kravetz March 25, 2021, 12:28 a.m. UTC
With the introduction of remove_hugetlb_page(), there is no need for
update_and_free_page to hold the hugetlb lock.  Change all callers to
drop the lock before calling.

With additional code modifications, this will allow loops which decrease
the huge page pool to drop the hugetlb_lock with each page to reduce
long hold times.

The ugly unlock/lock cycle in free_pool_huge_page will be removed in
a subsequent patch which restructures free_pool_huge_page.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
---
 mm/hugetlb.c | 34 +++++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

Comments

Michal Hocko March 25, 2021, 10:55 a.m. UTC | #1
On Wed 24-03-21 17:28:32, Mike Kravetz wrote:
> With the introduction of remove_hugetlb_page(), there is no need for
> update_and_free_page to hold the hugetlb lock.  Change all callers to
> drop the lock before calling.
> 
> With additional code modifications, this will allow loops which decrease
> the huge page pool to drop the hugetlb_lock with each page to reduce
> long hold times.
> 
> The ugly unlock/lock cycle in free_pool_huge_page will be removed in
> a subsequent patch which restructures free_pool_huge_page.
> 
> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>

Acked-by: Michal Hocko <mhocko@suse.com>

One minor thing below

[...]
> @@ -2563,22 +2572,37 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
>  						nodemask_t *nodes_allowed)
>  {
>  	int i;
> +	struct list_head page_list;
> +	struct page *page, *next;
>  
>  	if (hstate_is_gigantic(h))
>  		return;
>  
> +	/*
> +	 * Collect pages to be freed on a list, and free after dropping lock
> +	 */
> +	INIT_LIST_HEAD(&page_list);
>  	for_each_node_mask(i, *nodes_allowed) {
> -		struct page *page, *next;
>  		struct list_head *freel = &h->hugepage_freelists[i];
>  		list_for_each_entry_safe(page, next, freel, lru) {
>  			if (count >= h->nr_huge_pages)
> -				return;
> +				goto out;
>  			if (PageHighMem(page))
>  				continue;
>  			remove_hugetlb_page(h, page, false);
> -			update_and_free_page(h, page);
> +			INIT_LIST_HEAD(&page->lru);

What is the point of rhis INIT_LIST_HEAD? Page has been removed from the
list by remove_hugetlb_page so it can be added to a new one without any
reinitialization.

> +			list_add(&page->lru, &page_list);
>  		}
>  	}
> +
> +out:
> +	spin_unlock(&hugetlb_lock);
> +	list_for_each_entry_safe(page, next, &page_list, lru) {
> +		list_del(&page->lru);
> +		update_and_free_page(h, page);
> +		cond_resched();
> +	}
> +	spin_lock(&hugetlb_lock);
>  }
>  #else
>  static inline void try_to_free_low(struct hstate *h, unsigned long count,
> -- 
> 2.30.2
>
Mike Kravetz March 25, 2021, 5:12 p.m. UTC | #2
On 3/25/21 3:55 AM, Michal Hocko wrote:
> On Wed 24-03-21 17:28:32, Mike Kravetz wrote:
>> With the introduction of remove_hugetlb_page(), there is no need for
>> update_and_free_page to hold the hugetlb lock.  Change all callers to
>> drop the lock before calling.
>>
>> With additional code modifications, this will allow loops which decrease
>> the huge page pool to drop the hugetlb_lock with each page to reduce
>> long hold times.
>>
>> The ugly unlock/lock cycle in free_pool_huge_page will be removed in
>> a subsequent patch which restructures free_pool_huge_page.
>>
>> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
> 
> Acked-by: Michal Hocko <mhocko@suse.com>
> 
> One minor thing below
> 
> [...]
>> @@ -2563,22 +2572,37 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
>>  						nodemask_t *nodes_allowed)
>>  {
>>  	int i;
>> +	struct list_head page_list;
>> +	struct page *page, *next;
>>  
>>  	if (hstate_is_gigantic(h))
>>  		return;
>>  
>> +	/*
>> +	 * Collect pages to be freed on a list, and free after dropping lock
>> +	 */
>> +	INIT_LIST_HEAD(&page_list);
>>  	for_each_node_mask(i, *nodes_allowed) {
>> -		struct page *page, *next;
>>  		struct list_head *freel = &h->hugepage_freelists[i];
>>  		list_for_each_entry_safe(page, next, freel, lru) {
>>  			if (count >= h->nr_huge_pages)
>> -				return;
>> +				goto out;
>>  			if (PageHighMem(page))
>>  				continue;
>>  			remove_hugetlb_page(h, page, false);
>> -			update_and_free_page(h, page);
>> +			INIT_LIST_HEAD(&page->lru);
> 
> What is the point of rhis INIT_LIST_HEAD? Page has been removed from the
> list by remove_hugetlb_page so it can be added to a new one without any
> reinitialization.

remove_hugetlb_page just does a list_del.  list_del will poison the
pointers in page->lru.  The following list_add will then complain about
list corruption.

I could replace the list_del in remove_hugetlb_page with list_del_init.
However, not all callers of remove_hugetlb_page will be adding the page
to a list.  If we just call update_and_free_page, there is no need to
reinitialize the list pointers.

Might be better to just use list_del_init in remove_hugetlb_page to
avoid any questions like this.
Michal Hocko March 25, 2021, 7:39 p.m. UTC | #3
On Thu 25-03-21 10:12:05, Mike Kravetz wrote:
> On 3/25/21 3:55 AM, Michal Hocko wrote:
> > On Wed 24-03-21 17:28:32, Mike Kravetz wrote:
> >> With the introduction of remove_hugetlb_page(), there is no need for
> >> update_and_free_page to hold the hugetlb lock.  Change all callers to
> >> drop the lock before calling.
> >>
> >> With additional code modifications, this will allow loops which decrease
> >> the huge page pool to drop the hugetlb_lock with each page to reduce
> >> long hold times.
> >>
> >> The ugly unlock/lock cycle in free_pool_huge_page will be removed in
> >> a subsequent patch which restructures free_pool_huge_page.
> >>
> >> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
> > 
> > Acked-by: Michal Hocko <mhocko@suse.com>
> > 
> > One minor thing below
> > 
> > [...]
> >> @@ -2563,22 +2572,37 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
> >>  						nodemask_t *nodes_allowed)
> >>  {
> >>  	int i;
> >> +	struct list_head page_list;
> >> +	struct page *page, *next;
> >>  
> >>  	if (hstate_is_gigantic(h))
> >>  		return;
> >>  
> >> +	/*
> >> +	 * Collect pages to be freed on a list, and free after dropping lock
> >> +	 */
> >> +	INIT_LIST_HEAD(&page_list);
> >>  	for_each_node_mask(i, *nodes_allowed) {
> >> -		struct page *page, *next;
> >>  		struct list_head *freel = &h->hugepage_freelists[i];
> >>  		list_for_each_entry_safe(page, next, freel, lru) {
> >>  			if (count >= h->nr_huge_pages)
> >> -				return;
> >> +				goto out;
> >>  			if (PageHighMem(page))
> >>  				continue;
> >>  			remove_hugetlb_page(h, page, false);
> >> -			update_and_free_page(h, page);
> >> +			INIT_LIST_HEAD(&page->lru);
> > 
> > What is the point of rhis INIT_LIST_HEAD? Page has been removed from the
> > list by remove_hugetlb_page so it can be added to a new one without any
> > reinitialization.
> 
> remove_hugetlb_page just does a list_del.  list_del will poison the
> pointers in page->lru.  The following list_add will then complain about
> list corruption.

Are you sure? list_del followed by list_add is a normal API usage
pattern AFAIK. INIT_LIST_HEAD is to do the first initialization before
first use.
Mike Kravetz March 25, 2021, 8:33 p.m. UTC | #4
On 3/25/21 12:39 PM, Michal Hocko wrote:
> On Thu 25-03-21 10:12:05, Mike Kravetz wrote:
>> On 3/25/21 3:55 AM, Michal Hocko wrote:
>>> On Wed 24-03-21 17:28:32, Mike Kravetz wrote:
>>>> With the introduction of remove_hugetlb_page(), there is no need for
>>>> update_and_free_page to hold the hugetlb lock.  Change all callers to
>>>> drop the lock before calling.
>>>>
>>>> With additional code modifications, this will allow loops which decrease
>>>> the huge page pool to drop the hugetlb_lock with each page to reduce
>>>> long hold times.
>>>>
>>>> The ugly unlock/lock cycle in free_pool_huge_page will be removed in
>>>> a subsequent patch which restructures free_pool_huge_page.
>>>>
>>>> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
>>>
>>> Acked-by: Michal Hocko <mhocko@suse.com>
>>>
>>> One minor thing below
>>>
>>> [...]
>>>> @@ -2563,22 +2572,37 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
>>>>  						nodemask_t *nodes_allowed)
>>>>  {
>>>>  	int i;
>>>> +	struct list_head page_list;
>>>> +	struct page *page, *next;
>>>>  
>>>>  	if (hstate_is_gigantic(h))
>>>>  		return;
>>>>  
>>>> +	/*
>>>> +	 * Collect pages to be freed on a list, and free after dropping lock
>>>> +	 */
>>>> +	INIT_LIST_HEAD(&page_list);
>>>>  	for_each_node_mask(i, *nodes_allowed) {
>>>> -		struct page *page, *next;
>>>>  		struct list_head *freel = &h->hugepage_freelists[i];
>>>>  		list_for_each_entry_safe(page, next, freel, lru) {
>>>>  			if (count >= h->nr_huge_pages)
>>>> -				return;
>>>> +				goto out;
>>>>  			if (PageHighMem(page))
>>>>  				continue;
>>>>  			remove_hugetlb_page(h, page, false);
>>>> -			update_and_free_page(h, page);
>>>> +			INIT_LIST_HEAD(&page->lru);
>>>
>>> What is the point of rhis INIT_LIST_HEAD? Page has been removed from the
>>> list by remove_hugetlb_page so it can be added to a new one without any
>>> reinitialization.
>>
>> remove_hugetlb_page just does a list_del.  list_del will poison the
>> pointers in page->lru.  The following list_add will then complain about
>> list corruption.
> 
> Are you sure? list_del followed by list_add is a normal API usage
> pattern AFAIK. INIT_LIST_HEAD is to do the first initialization before
> first use.

Sorry for the noise.  The INIT_LIST_HEAD is indeed unnecessary.

I must have got confused while looking at a corrupt list splat in
earlier code development.
Muchun Song March 27, 2021, 6:54 a.m. UTC | #5
On Thu, Mar 25, 2021 at 8:29 AM Mike Kravetz <mike.kravetz@oracle.com> wrote:
>
> With the introduction of remove_hugetlb_page(), there is no need for
> update_and_free_page to hold the hugetlb lock.  Change all callers to
> drop the lock before calling.
>
> With additional code modifications, this will allow loops which decrease
> the huge page pool to drop the hugetlb_lock with each page to reduce
> long hold times.
>
> The ugly unlock/lock cycle in free_pool_huge_page will be removed in
> a subsequent patch which restructures free_pool_huge_page.
>
> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>

Reviewed-by: Muchun Song <songmuchun@bytedance.com>

Some nits below.

> ---
>  mm/hugetlb.c | 34 +++++++++++++++++++++++++++++-----
>  1 file changed, 29 insertions(+), 5 deletions(-)
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 3938ec086b5c..fae7f034d1eb 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1450,16 +1450,18 @@ static void __free_huge_page(struct page *page)
>
>         if (HPageTemporary(page)) {
>                 remove_hugetlb_page(h, page, false);
> +               spin_unlock(&hugetlb_lock);
>                 update_and_free_page(h, page);
>         } else if (h->surplus_huge_pages_node[nid]) {
>                 /* remove the page from active list */
>                 remove_hugetlb_page(h, page, true);
> +               spin_unlock(&hugetlb_lock);
>                 update_and_free_page(h, page);
>         } else {
>                 arch_clear_hugepage_flags(page);
>                 enqueue_huge_page(h, page);
> +               spin_unlock(&hugetlb_lock);
>         }
> -       spin_unlock(&hugetlb_lock);
>  }
>
>  /*
> @@ -1740,7 +1742,13 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
>                                 list_entry(h->hugepage_freelists[node].next,
>                                           struct page, lru);
>                         remove_hugetlb_page(h, page, acct_surplus);
> +                       /*
> +                        * unlock/lock around update_and_free_page is temporary
> +                        * and will be removed with subsequent patch.
> +                        */
> +                       spin_unlock(&hugetlb_lock);
>                         update_and_free_page(h, page);
> +                       spin_lock(&hugetlb_lock);
>                         ret = 1;
>                         break;
>                 }
> @@ -1809,8 +1817,9 @@ int dissolve_free_huge_page(struct page *page)
>                 }
>                 remove_hugetlb_page(h, page, false);
>                 h->max_huge_pages--;
> +               spin_unlock(&hugetlb_lock);
>                 update_and_free_page(h, head);
> -               rc = 0;
> +               return 0;
>         }
>  out:
>         spin_unlock(&hugetlb_lock);
> @@ -2563,22 +2572,37 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
>                                                 nodemask_t *nodes_allowed)
>  {
>         int i;
> +       struct list_head page_list;

I prefer to use LIST_HEAD(page_list) directly.

> +       struct page *page, *next;
>
>         if (hstate_is_gigantic(h))
>                 return;
>
> +       /*
> +        * Collect pages to be freed on a list, and free after dropping lock
> +        */
> +       INIT_LIST_HEAD(&page_list);
>         for_each_node_mask(i, *nodes_allowed) {
> -               struct page *page, *next;
>                 struct list_head *freel = &h->hugepage_freelists[i];
>                 list_for_each_entry_safe(page, next, freel, lru) {
>                         if (count >= h->nr_huge_pages)
> -                               return;
> +                               goto out;
>                         if (PageHighMem(page))
>                                 continue;
>                         remove_hugetlb_page(h, page, false);
> -                       update_and_free_page(h, page);
> +                       INIT_LIST_HEAD(&page->lru);

As Michal pointed out that this is superfluous.

> +                       list_add(&page->lru, &page_list);
>                 }
>         }
> +
> +out:
> +       spin_unlock(&hugetlb_lock);
> +       list_for_each_entry_safe(page, next, &page_list, lru) {
> +               list_del(&page->lru);

It looks like list_del() is also superfluous. Should we remove it?

Thanks.

> +               update_and_free_page(h, page);
> +               cond_resched();
> +       }
> +       spin_lock(&hugetlb_lock);
>  }
>  #else
>  static inline void try_to_free_low(struct hstate *h, unsigned long count,
> --
> 2.30.2
>
Mike Kravetz March 28, 2021, 9:40 p.m. UTC | #6
On 3/26/21 11:54 PM, Muchun Song wrote:
> On Thu, Mar 25, 2021 at 8:29 AM Mike Kravetz <mike.kravetz@oracle.com> wrote:
>>
>> With the introduction of remove_hugetlb_page(), there is no need for
>> update_and_free_page to hold the hugetlb lock.  Change all callers to
>> drop the lock before calling.
>>
>> With additional code modifications, this will allow loops which decrease
>> the huge page pool to drop the hugetlb_lock with each page to reduce
>> long hold times.
>>
>> The ugly unlock/lock cycle in free_pool_huge_page will be removed in
>> a subsequent patch which restructures free_pool_huge_page.
>>
>> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
> 
> Reviewed-by: Muchun Song <songmuchun@bytedance.com>
> 
> Some nits below.

Thanks Muchun,

I agree with all your suggestions below, and will include modifications
in the next version.
diff mbox series

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3938ec086b5c..fae7f034d1eb 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1450,16 +1450,18 @@  static void __free_huge_page(struct page *page)
 
 	if (HPageTemporary(page)) {
 		remove_hugetlb_page(h, page, false);
+		spin_unlock(&hugetlb_lock);
 		update_and_free_page(h, page);
 	} else if (h->surplus_huge_pages_node[nid]) {
 		/* remove the page from active list */
 		remove_hugetlb_page(h, page, true);
+		spin_unlock(&hugetlb_lock);
 		update_and_free_page(h, page);
 	} else {
 		arch_clear_hugepage_flags(page);
 		enqueue_huge_page(h, page);
+		spin_unlock(&hugetlb_lock);
 	}
-	spin_unlock(&hugetlb_lock);
 }
 
 /*
@@ -1740,7 +1742,13 @@  static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
 				list_entry(h->hugepage_freelists[node].next,
 					  struct page, lru);
 			remove_hugetlb_page(h, page, acct_surplus);
+			/*
+			 * unlock/lock around update_and_free_page is temporary
+			 * and will be removed with subsequent patch.
+			 */
+			spin_unlock(&hugetlb_lock);
 			update_and_free_page(h, page);
+			spin_lock(&hugetlb_lock);
 			ret = 1;
 			break;
 		}
@@ -1809,8 +1817,9 @@  int dissolve_free_huge_page(struct page *page)
 		}
 		remove_hugetlb_page(h, page, false);
 		h->max_huge_pages--;
+		spin_unlock(&hugetlb_lock);
 		update_and_free_page(h, head);
-		rc = 0;
+		return 0;
 	}
 out:
 	spin_unlock(&hugetlb_lock);
@@ -2563,22 +2572,37 @@  static void try_to_free_low(struct hstate *h, unsigned long count,
 						nodemask_t *nodes_allowed)
 {
 	int i;
+	struct list_head page_list;
+	struct page *page, *next;
 
 	if (hstate_is_gigantic(h))
 		return;
 
+	/*
+	 * Collect pages to be freed on a list, and free after dropping lock
+	 */
+	INIT_LIST_HEAD(&page_list);
 	for_each_node_mask(i, *nodes_allowed) {
-		struct page *page, *next;
 		struct list_head *freel = &h->hugepage_freelists[i];
 		list_for_each_entry_safe(page, next, freel, lru) {
 			if (count >= h->nr_huge_pages)
-				return;
+				goto out;
 			if (PageHighMem(page))
 				continue;
 			remove_hugetlb_page(h, page, false);
-			update_and_free_page(h, page);
+			INIT_LIST_HEAD(&page->lru);
+			list_add(&page->lru, &page_list);
 		}
 	}
+
+out:
+	spin_unlock(&hugetlb_lock);
+	list_for_each_entry_safe(page, next, &page_list, lru) {
+		list_del(&page->lru);
+		update_and_free_page(h, page);
+		cond_resched();
+	}
+	spin_lock(&hugetlb_lock);
 }
 #else
 static inline void try_to_free_low(struct hstate *h, unsigned long count,