diff mbox series

mm: hugetlb_vmemmap: use bulk allocator in alloc_vmemmap_page_list()

Message ID 20230905071016.2818810-1-wangkefeng.wang@huawei.com (mailing list archive)
State New
Headers show
Series mm: hugetlb_vmemmap: use bulk allocator in alloc_vmemmap_page_list() | expand

Commit Message

Kefeng Wang Sept. 5, 2023, 7:10 a.m. UTC
It is needed 4095 pages(1G) or 7 pages(2M) to be allocated once in
alloc_vmemmap_page_list(), so let's add a bulk allocator varietas
alloc_pages_bulk_list_node() and switch alloc_vmemmap_page_list()
to use it to accelerate page allocation.

Simple test on arm64's qemu with 1G Hugetlb, 870,842ns vs 3,845,252ns
despite the fluctuations, it is still a nice improvement.

Tested-by: Yuan Can <yuancan@huawei.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 include/linux/gfp.h  | 9 +++++++++
 mm/hugetlb_vmemmap.c | 7 ++++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

Comments

Kefeng Wang Sept. 5, 2023, 7:32 a.m. UTC | #1
On 2023/9/5 15:10, Kefeng Wang wrote:
> It is needed 4095 pages(1G) or 7 pages(2M) to be allocated once in
> alloc_vmemmap_page_list(), so let's add a bulk allocator varietas
> alloc_pages_bulk_list_node() and switch alloc_vmemmap_page_list()
> to use it to accelerate page allocation.
> 
> Simple test on arm64's qemu with 1G Hugetlb, 870,842ns vs 3,845,252ns
> despite the fluctuations, it is still a nice improvement.
> 
> Tested-by: Yuan Can <yuancan@huawei.com>
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>   include/linux/gfp.h  | 9 +++++++++
>   mm/hugetlb_vmemmap.c | 7 ++++++-
>   2 files changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 665f06675c83..d6e82f15b61f 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -195,6 +195,15 @@ alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list)
>   	return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL);
>   }
>   
> +static inline unsigned long
> +alloc_pages_bulk_list_node(gfp_t gfp, int nid, unsigned long nr_pages, struct list_head *list)
> +{
> +	if (nid == NUMA_NO_NODE)
> +		nid = numa_mem_id();
> +
> +	return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, list, NULL);
> +}
> +
>   static inline unsigned long
>   alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array)
>   {
> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> index 4b9734777f69..699c4fea6b9f 100644
> --- a/mm/hugetlb_vmemmap.c
> +++ b/mm/hugetlb_vmemmap.c
> @@ -384,8 +384,13 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,
>   	unsigned long nr_pages = (end - start) >> PAGE_SHIFT;
>   	int nid = page_to_nid((struct page *)start);
>   	struct page *page, *next;
> +	unsigned long nr_alloced;
>   
> -	while (nr_pages--) {
> +	nr_alloced = alloc_pages_bulk_list_node(gfp_mask, nid, nr_pages, list);
> +	if (!nr_alloced)
> +		return -ENOMEM;
> +

eh, forget to inc nr_allocated in the fallback patch, will resend
> +	while (nr_alloced < nr_pages) {
>   		page = alloc_pages_node(nid, gfp_mask, 0);
>   		if (!page)
>   			goto out;
Muchun Song Sept. 5, 2023, 9:23 a.m. UTC | #2
> On Sep 5, 2023, at 15:32, Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> On 2023/9/5 15:10, Kefeng Wang wrote:
>> It is needed 4095 pages(1G) or 7 pages(2M) to be allocated once in
>> alloc_vmemmap_page_list(), so let's add a bulk allocator varietas
>> alloc_pages_bulk_list_node() and switch alloc_vmemmap_page_list()
>> to use it to accelerate page allocation.
>> Simple test on arm64's qemu with 1G Hugetlb, 870,842ns vs 3,845,252ns
>> despite the fluctuations, it is still a nice improvement.
>> Tested-by: Yuan Can <yuancan@huawei.com>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>  include/linux/gfp.h  | 9 +++++++++
>>  mm/hugetlb_vmemmap.c | 7 ++++++-
>>  2 files changed, 15 insertions(+), 1 deletion(-)
>> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
>> index 665f06675c83..d6e82f15b61f 100644
>> --- a/include/linux/gfp.h
>> +++ b/include/linux/gfp.h
>> @@ -195,6 +195,15 @@ alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list)
>>   return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL);
>>  }
>>  +static inline unsigned long
>> +alloc_pages_bulk_list_node(gfp_t gfp, int nid, unsigned long nr_pages, struct list_head *list)
>> +{
>> + if (nid == NUMA_NO_NODE)
>> + nid = numa_mem_id();
>> +
>> + return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, list, NULL);
>> +}
>> +
>>  static inline unsigned long
>>  alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array)
>>  {
>> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
>> index 4b9734777f69..699c4fea6b9f 100644
>> --- a/mm/hugetlb_vmemmap.c
>> +++ b/mm/hugetlb_vmemmap.c
>> @@ -384,8 +384,13 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,
>>   unsigned long nr_pages = (end - start) >> PAGE_SHIFT;
>>   int nid = page_to_nid((struct page *)start);
>>   struct page *page, *next;
>> + unsigned long nr_alloced;
>>  - while (nr_pages--) {
>> + nr_alloced = alloc_pages_bulk_list_node(gfp_mask, nid, nr_pages, list);
>> + if (!nr_alloced)
>> + return -ENOMEM;
>> +
> 
> eh, forget to inc nr_allocated in the fallback patch, will resend

Do not change the judgement, "nr_pages -= nr_alloced;" is enough
and simple.

>> + while (nr_alloced < nr_pages) {
>>   page = alloc_pages_node(nid, gfp_mask, 0);
>>   if (!page)
>>   goto out;
Kefeng Wang Sept. 5, 2023, 9:34 a.m. UTC | #3
On 2023/9/5 17:23, Muchun Song wrote:
> 
> 
>> On Sep 5, 2023, at 15:32, Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>> On 2023/9/5 15:10, Kefeng Wang wrote:
>>> It is needed 4095 pages(1G) or 7 pages(2M) to be allocated once in
>>> alloc_vmemmap_page_list(), so let's add a bulk allocator varietas
>>> alloc_pages_bulk_list_node() and switch alloc_vmemmap_page_list()
>>> to use it to accelerate page allocation.
>>> Simple test on arm64's qemu with 1G Hugetlb, 870,842ns vs 3,845,252ns
>>> despite the fluctuations, it is still a nice improvement.
>>> Tested-by: Yuan Can <yuancan@huawei.com>
>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>>> ---
>>>   include/linux/gfp.h  | 9 +++++++++
>>>   mm/hugetlb_vmemmap.c | 7 ++++++-
>>>   2 files changed, 15 insertions(+), 1 deletion(-)
>>> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
>>> index 665f06675c83..d6e82f15b61f 100644
>>> --- a/include/linux/gfp.h
>>> +++ b/include/linux/gfp.h
>>> @@ -195,6 +195,15 @@ alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list)
>>>    return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL);
>>>   }
>>>   +static inline unsigned long
>>> +alloc_pages_bulk_list_node(gfp_t gfp, int nid, unsigned long nr_pages, struct list_head *list)
>>> +{
>>> + if (nid == NUMA_NO_NODE)
>>> + nid = numa_mem_id();
>>> +
>>> + return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, list, NULL);
>>> +}
>>> +
>>>   static inline unsigned long
>>>   alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array)
>>>   {
>>> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
>>> index 4b9734777f69..699c4fea6b9f 100644
>>> --- a/mm/hugetlb_vmemmap.c
>>> +++ b/mm/hugetlb_vmemmap.c
>>> @@ -384,8 +384,13 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,
>>>    unsigned long nr_pages = (end - start) >> PAGE_SHIFT;
>>>    int nid = page_to_nid((struct page *)start);
>>>    struct page *page, *next;
>>> + unsigned long nr_alloced;
>>>   - while (nr_pages--) {
>>> + nr_alloced = alloc_pages_bulk_list_node(gfp_mask, nid, nr_pages, list);
>>> + if (!nr_alloced)
>>> + return -ENOMEM;
>>> +
>>
>> eh, forget to inc nr_allocated in the fallback patch, will resend
> 
> Do not change the judgement, "nr_pages -= nr_alloced;" is enough
> and simple.

sure, thanks.

> 
>>> + while (nr_alloced < nr_pages) {
>>>    page = alloc_pages_node(nid, gfp_mask, 0);
>>>    if (!page)
>>>    goto out;
> 
>
Kefeng Wang Sept. 5, 2023, 10:12 a.m. UTC | #4
On 2023/9/5 17:23, Muchun Song wrote:
> 
> 
>> On Sep 5, 2023, at 15:32, Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>> On 2023/9/5 15:10, Kefeng Wang wrote:
>>> It is needed 4095 pages(1G) or 7 pages(2M) to be allocated once in
>>> alloc_vmemmap_page_list(), so let's add a bulk allocator varietas
>>> alloc_pages_bulk_list_node() and switch alloc_vmemmap_page_list()
>>> to use it to accelerate page allocation.
>>> Simple test on arm64's qemu with 1G Hugetlb, 870,842ns vs 3,845,252ns
>>> despite the fluctuations, it is still a nice improvement.
>>> Tested-by: Yuan Can <yuancan@huawei.com>
>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>>> ---
>>>   include/linux/gfp.h  | 9 +++++++++
>>>   mm/hugetlb_vmemmap.c | 7 ++++++-
>>>   2 files changed, 15 insertions(+), 1 deletion(-)
>>> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
>>> index 665f06675c83..d6e82f15b61f 100644
>>> --- a/include/linux/gfp.h
>>> +++ b/include/linux/gfp.h
>>> @@ -195,6 +195,15 @@ alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list)
>>>    return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL);
>>>   }
>>>   +static inline unsigned long
>>> +alloc_pages_bulk_list_node(gfp_t gfp, int nid, unsigned long nr_pages, struct list_head *list)
>>> +{
>>> + if (nid == NUMA_NO_NODE)
>>> + nid = numa_mem_id();
>>> +
>>> + return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, list, NULL);
>>> +}
>>> +
>>>   static inline unsigned long
>>>   alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array)
>>>   {
>>> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
>>> index 4b9734777f69..699c4fea6b9f 100644
>>> --- a/mm/hugetlb_vmemmap.c
>>> +++ b/mm/hugetlb_vmemmap.c
>>> @@ -384,8 +384,13 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,
>>>    unsigned long nr_pages = (end - start) >> PAGE_SHIFT;
>>>    int nid = page_to_nid((struct page *)start);
>>>    struct page *page, *next;
>>> + unsigned long nr_alloced;
>>>   - while (nr_pages--) {
>>> + nr_alloced = alloc_pages_bulk_list_node(gfp_mask, nid, nr_pages, list);
>>> + if (!nr_alloced)
>>> + return -ENOMEM;
>>> +
>>
>> eh, forget to inc nr_allocated in the fallback patch, will resend
> 
> Do not change the judgement, "nr_pages -= nr_alloced;" is enough
> and simple.

nr_pages = 7,  nr_alloced = 4, new nr_pages = 3, the fallback won't
execute if nr_alloced not cleared, will add nr_allocated only alloc page 
successfully.

         while (nr_allocated < nr_pages) {
                 page = alloc_pages_node(nid, gfp_mask, 0);
                 if (!page)
                         goto out;
                 list_add_tail(&page->lru, list);
+               nr_allocated++;
         }

> 
>>> + while (nr_alloced < nr_pages) {
>>>    page = alloc_pages_node(nid, gfp_mask, 0);
>>>    if (!page)
>>>    goto out;
> 
>
diff mbox series

Patch

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 665f06675c83..d6e82f15b61f 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -195,6 +195,15 @@  alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list)
 	return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL);
 }
 
+static inline unsigned long
+alloc_pages_bulk_list_node(gfp_t gfp, int nid, unsigned long nr_pages, struct list_head *list)
+{
+	if (nid == NUMA_NO_NODE)
+		nid = numa_mem_id();
+
+	return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, list, NULL);
+}
+
 static inline unsigned long
 alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array)
 {
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 4b9734777f69..699c4fea6b9f 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -384,8 +384,13 @@  static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,
 	unsigned long nr_pages = (end - start) >> PAGE_SHIFT;
 	int nid = page_to_nid((struct page *)start);
 	struct page *page, *next;
+	unsigned long nr_alloced;
 
-	while (nr_pages--) {
+	nr_alloced = alloc_pages_bulk_list_node(gfp_mask, nid, nr_pages, list);
+	if (!nr_alloced)
+		return -ENOMEM;
+
+	while (nr_alloced < nr_pages) {
 		page = alloc_pages_node(nid, gfp_mask, 0);
 		if (!page)
 			goto out;