Message ID | 20230905071016.2818810-1-wangkefeng.wang@huawei.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm: hugetlb_vmemmap: use bulk allocator in alloc_vmemmap_page_list() | expand |
On 2023/9/5 15:10, Kefeng Wang wrote: > It is needed 4095 pages(1G) or 7 pages(2M) to be allocated once in > alloc_vmemmap_page_list(), so let's add a bulk allocator varietas > alloc_pages_bulk_list_node() and switch alloc_vmemmap_page_list() > to use it to accelerate page allocation. > > Simple test on arm64's qemu with 1G Hugetlb, 870,842ns vs 3,845,252ns > despite the fluctuations, it is still a nice improvement. > > Tested-by: Yuan Can <yuancan@huawei.com> > Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> > --- > include/linux/gfp.h | 9 +++++++++ > mm/hugetlb_vmemmap.c | 7 ++++++- > 2 files changed, 15 insertions(+), 1 deletion(-) > > diff --git a/include/linux/gfp.h b/include/linux/gfp.h > index 665f06675c83..d6e82f15b61f 100644 > --- a/include/linux/gfp.h > +++ b/include/linux/gfp.h > @@ -195,6 +195,15 @@ alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list) > return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL); > } > > +static inline unsigned long > +alloc_pages_bulk_list_node(gfp_t gfp, int nid, unsigned long nr_pages, struct list_head *list) > +{ > + if (nid == NUMA_NO_NODE) > + nid = numa_mem_id(); > + > + return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, list, NULL); > +} > + > static inline unsigned long > alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array) > { > diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c > index 4b9734777f69..699c4fea6b9f 100644 > --- a/mm/hugetlb_vmemmap.c > +++ b/mm/hugetlb_vmemmap.c > @@ -384,8 +384,13 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, > unsigned long nr_pages = (end - start) >> PAGE_SHIFT; > int nid = page_to_nid((struct page *)start); > struct page *page, *next; > + unsigned long nr_alloced; > > - while (nr_pages--) { > + nr_alloced = alloc_pages_bulk_list_node(gfp_mask, nid, nr_pages, list); > + if (!nr_alloced) > + return -ENOMEM; > + eh, forget to inc nr_allocated in the fallback patch, will resend > + while (nr_alloced < nr_pages) { > page = alloc_pages_node(nid, gfp_mask, 0); > if (!page) > goto out;
> On Sep 5, 2023, at 15:32, Kefeng Wang <wangkefeng.wang@huawei.com> wrote: > On 2023/9/5 15:10, Kefeng Wang wrote: >> It is needed 4095 pages(1G) or 7 pages(2M) to be allocated once in >> alloc_vmemmap_page_list(), so let's add a bulk allocator varietas >> alloc_pages_bulk_list_node() and switch alloc_vmemmap_page_list() >> to use it to accelerate page allocation. >> Simple test on arm64's qemu with 1G Hugetlb, 870,842ns vs 3,845,252ns >> despite the fluctuations, it is still a nice improvement. >> Tested-by: Yuan Can <yuancan@huawei.com> >> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> >> --- >> include/linux/gfp.h | 9 +++++++++ >> mm/hugetlb_vmemmap.c | 7 ++++++- >> 2 files changed, 15 insertions(+), 1 deletion(-) >> diff --git a/include/linux/gfp.h b/include/linux/gfp.h >> index 665f06675c83..d6e82f15b61f 100644 >> --- a/include/linux/gfp.h >> +++ b/include/linux/gfp.h >> @@ -195,6 +195,15 @@ alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list) >> return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL); >> } >> +static inline unsigned long >> +alloc_pages_bulk_list_node(gfp_t gfp, int nid, unsigned long nr_pages, struct list_head *list) >> +{ >> + if (nid == NUMA_NO_NODE) >> + nid = numa_mem_id(); >> + >> + return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, list, NULL); >> +} >> + >> static inline unsigned long >> alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array) >> { >> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c >> index 4b9734777f69..699c4fea6b9f 100644 >> --- a/mm/hugetlb_vmemmap.c >> +++ b/mm/hugetlb_vmemmap.c >> @@ -384,8 +384,13 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, >> unsigned long nr_pages = (end - start) >> PAGE_SHIFT; >> int nid = page_to_nid((struct page *)start); >> struct page *page, *next; >> + unsigned long nr_alloced; >> - while (nr_pages--) { >> + nr_alloced = alloc_pages_bulk_list_node(gfp_mask, nid, nr_pages, list); >> + if (!nr_alloced) >> + return -ENOMEM; >> + > > eh, forget to inc nr_allocated in the fallback patch, will resend Do not change the judgement, "nr_pages -= nr_alloced;" is enough and simple. >> + while (nr_alloced < nr_pages) { >> page = alloc_pages_node(nid, gfp_mask, 0); >> if (!page) >> goto out;
On 2023/9/5 17:23, Muchun Song wrote: > > >> On Sep 5, 2023, at 15:32, Kefeng Wang <wangkefeng.wang@huawei.com> wrote: >> On 2023/9/5 15:10, Kefeng Wang wrote: >>> It is needed 4095 pages(1G) or 7 pages(2M) to be allocated once in >>> alloc_vmemmap_page_list(), so let's add a bulk allocator varietas >>> alloc_pages_bulk_list_node() and switch alloc_vmemmap_page_list() >>> to use it to accelerate page allocation. >>> Simple test on arm64's qemu with 1G Hugetlb, 870,842ns vs 3,845,252ns >>> despite the fluctuations, it is still a nice improvement. >>> Tested-by: Yuan Can <yuancan@huawei.com> >>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> >>> --- >>> include/linux/gfp.h | 9 +++++++++ >>> mm/hugetlb_vmemmap.c | 7 ++++++- >>> 2 files changed, 15 insertions(+), 1 deletion(-) >>> diff --git a/include/linux/gfp.h b/include/linux/gfp.h >>> index 665f06675c83..d6e82f15b61f 100644 >>> --- a/include/linux/gfp.h >>> +++ b/include/linux/gfp.h >>> @@ -195,6 +195,15 @@ alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list) >>> return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL); >>> } >>> +static inline unsigned long >>> +alloc_pages_bulk_list_node(gfp_t gfp, int nid, unsigned long nr_pages, struct list_head *list) >>> +{ >>> + if (nid == NUMA_NO_NODE) >>> + nid = numa_mem_id(); >>> + >>> + return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, list, NULL); >>> +} >>> + >>> static inline unsigned long >>> alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array) >>> { >>> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c >>> index 4b9734777f69..699c4fea6b9f 100644 >>> --- a/mm/hugetlb_vmemmap.c >>> +++ b/mm/hugetlb_vmemmap.c >>> @@ -384,8 +384,13 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, >>> unsigned long nr_pages = (end - start) >> PAGE_SHIFT; >>> int nid = page_to_nid((struct page *)start); >>> struct page *page, *next; >>> + unsigned long nr_alloced; >>> - while (nr_pages--) { >>> + nr_alloced = alloc_pages_bulk_list_node(gfp_mask, nid, nr_pages, list); >>> + if (!nr_alloced) >>> + return -ENOMEM; >>> + >> >> eh, forget to inc nr_allocated in the fallback patch, will resend > > Do not change the judgement, "nr_pages -= nr_alloced;" is enough > and simple. sure, thanks. > >>> + while (nr_alloced < nr_pages) { >>> page = alloc_pages_node(nid, gfp_mask, 0); >>> if (!page) >>> goto out; > >
On 2023/9/5 17:23, Muchun Song wrote: > > >> On Sep 5, 2023, at 15:32, Kefeng Wang <wangkefeng.wang@huawei.com> wrote: >> On 2023/9/5 15:10, Kefeng Wang wrote: >>> It is needed 4095 pages(1G) or 7 pages(2M) to be allocated once in >>> alloc_vmemmap_page_list(), so let's add a bulk allocator varietas >>> alloc_pages_bulk_list_node() and switch alloc_vmemmap_page_list() >>> to use it to accelerate page allocation. >>> Simple test on arm64's qemu with 1G Hugetlb, 870,842ns vs 3,845,252ns >>> despite the fluctuations, it is still a nice improvement. >>> Tested-by: Yuan Can <yuancan@huawei.com> >>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> >>> --- >>> include/linux/gfp.h | 9 +++++++++ >>> mm/hugetlb_vmemmap.c | 7 ++++++- >>> 2 files changed, 15 insertions(+), 1 deletion(-) >>> diff --git a/include/linux/gfp.h b/include/linux/gfp.h >>> index 665f06675c83..d6e82f15b61f 100644 >>> --- a/include/linux/gfp.h >>> +++ b/include/linux/gfp.h >>> @@ -195,6 +195,15 @@ alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list) >>> return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL); >>> } >>> +static inline unsigned long >>> +alloc_pages_bulk_list_node(gfp_t gfp, int nid, unsigned long nr_pages, struct list_head *list) >>> +{ >>> + if (nid == NUMA_NO_NODE) >>> + nid = numa_mem_id(); >>> + >>> + return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, list, NULL); >>> +} >>> + >>> static inline unsigned long >>> alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array) >>> { >>> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c >>> index 4b9734777f69..699c4fea6b9f 100644 >>> --- a/mm/hugetlb_vmemmap.c >>> +++ b/mm/hugetlb_vmemmap.c >>> @@ -384,8 +384,13 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, >>> unsigned long nr_pages = (end - start) >> PAGE_SHIFT; >>> int nid = page_to_nid((struct page *)start); >>> struct page *page, *next; >>> + unsigned long nr_alloced; >>> - while (nr_pages--) { >>> + nr_alloced = alloc_pages_bulk_list_node(gfp_mask, nid, nr_pages, list); >>> + if (!nr_alloced) >>> + return -ENOMEM; >>> + >> >> eh, forget to inc nr_allocated in the fallback patch, will resend > > Do not change the judgement, "nr_pages -= nr_alloced;" is enough > and simple. nr_pages = 7, nr_alloced = 4, new nr_pages = 3, the fallback won't execute if nr_alloced not cleared, will add nr_allocated only alloc page successfully. while (nr_allocated < nr_pages) { page = alloc_pages_node(nid, gfp_mask, 0); if (!page) goto out; list_add_tail(&page->lru, list); + nr_allocated++; } > >>> + while (nr_alloced < nr_pages) { >>> page = alloc_pages_node(nid, gfp_mask, 0); >>> if (!page) >>> goto out; > >
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 665f06675c83..d6e82f15b61f 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -195,6 +195,15 @@ alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list) return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL); } +static inline unsigned long +alloc_pages_bulk_list_node(gfp_t gfp, int nid, unsigned long nr_pages, struct list_head *list) +{ + if (nid == NUMA_NO_NODE) + nid = numa_mem_id(); + + return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, list, NULL); +} + static inline unsigned long alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array) { diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 4b9734777f69..699c4fea6b9f 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -384,8 +384,13 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, unsigned long nr_pages = (end - start) >> PAGE_SHIFT; int nid = page_to_nid((struct page *)start); struct page *page, *next; + unsigned long nr_alloced; - while (nr_pages--) { + nr_alloced = alloc_pages_bulk_list_node(gfp_mask, nid, nr_pages, list); + if (!nr_alloced) + return -ENOMEM; + + while (nr_alloced < nr_pages) { page = alloc_pages_node(nid, gfp_mask, 0); if (!page) goto out;