Message ID | 20210714193542.21857-14-joao.m.martins@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm, sparse-vmemmap: Introduce compound pagemaps | expand |
On Wed, Jul 14, 2021 at 12:36 PM Joao Martins <joao.m.martins@oracle.com> wrote: > > Use try_grab_compound_head() for device-dax GUP when configured with a > compound pagemap. > > Rather than incrementing the refcount for each page, do one atomic > addition for all the pages to be pinned. > > Performance measured by gup_benchmark improves considerably > get_user_pages_fast() and pin_user_pages_fast() with NVDIMMs: > > $ gup_test -f /dev/dax1.0 -m 16384 -r 10 -S [-u,-a] -n 512 -w > (get_user_pages_fast 2M pages) ~59 ms -> ~6.1 ms > (pin_user_pages_fast 2M pages) ~87 ms -> ~6.2 ms > [altmap] > (get_user_pages_fast 2M pages) ~494 ms -> ~9 ms > (pin_user_pages_fast 2M pages) ~494 ms -> ~10 ms > > $ gup_test -f /dev/dax1.0 -m 129022 -r 10 -S [-u,-a] -n 512 -w > (get_user_pages_fast 2M pages) ~492 ms -> ~49 ms > (pin_user_pages_fast 2M pages) ~493 ms -> ~50 ms > [altmap with -m 127004] > (get_user_pages_fast 2M pages) ~3.91 sec -> ~70 ms > (pin_user_pages_fast 2M pages) ~3.97 sec -> ~74 ms > > Signed-off-by: Joao Martins <joao.m.martins@oracle.com> > --- > mm/gup.c | 53 +++++++++++++++++++++++++++++++++-------------------- > 1 file changed, 33 insertions(+), 20 deletions(-) > > diff --git a/mm/gup.c b/mm/gup.c > index 42b8b1fa6521..9baaa1c0b7f3 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -2234,31 +2234,55 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, > } > #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ > > + > +static int record_subpages(struct page *page, unsigned long addr, > + unsigned long end, struct page **pages) > +{ > + int nr; > + > + for (nr = 0; addr != end; addr += PAGE_SIZE) > + pages[nr++] = page++; > + > + return nr; > +} > + > #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) > static int __gup_device_huge(unsigned long pfn, unsigned long addr, > unsigned long end, unsigned int flags, > struct page **pages, int *nr) > { > - int nr_start = *nr; > + int refs, nr_start = *nr; > struct dev_pagemap *pgmap = NULL; > > do { > - struct page *page = pfn_to_page(pfn); > + struct page *pinned_head, *head, *page = pfn_to_page(pfn); > + unsigned long next; > > pgmap = get_dev_pagemap(pfn, pgmap); > if (unlikely(!pgmap)) { > undo_dev_pagemap(nr, nr_start, flags, pages); > return 0; > } > - SetPageReferenced(page); > - pages[*nr] = page; > - if (unlikely(!try_grab_page(page, flags))) { > - undo_dev_pagemap(nr, nr_start, flags, pages); > + > + head = compound_head(page); > + /* @end is assumed to be limited at most one compound page */ > + next = PageCompound(head) ? end : addr + PAGE_SIZE; Please no ternary operator for this check, but otherwise this patch looks good to me. Reviewed-by: Dan Williams <dan.j.williams@intel.com> > + refs = record_subpages(page, addr, next, pages + *nr); > + > + SetPageReferenced(head); > + pinned_head = try_grab_compound_head(head, refs, flags); > + if (!pinned_head) { > + if (PageCompound(head)) { > + ClearPageReferenced(head); > + put_dev_pagemap(pgmap); > + } else { > + undo_dev_pagemap(nr, nr_start, flags, pages); > + } > return 0; > } > - (*nr)++; > - pfn++; > - } while (addr += PAGE_SIZE, addr != end); > + *nr += refs; > + pfn += refs; > + } while (addr += (refs << PAGE_SHIFT), addr != end); > > if (pgmap) > put_dev_pagemap(pgmap); > @@ -2318,17 +2342,6 @@ static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr, > } > #endif > > -static int record_subpages(struct page *page, unsigned long addr, > - unsigned long end, struct page **pages) > -{ > - int nr; > - > - for (nr = 0; addr != end; addr += PAGE_SIZE) > - pages[nr++] = page++; > - > - return nr; > -} > - > #ifdef CONFIG_ARCH_HAS_HUGEPD > static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, > unsigned long sz) > -- > 2.17.1 >
On 7/28/21 8:55 PM, Dan Williams wrote: > On Wed, Jul 14, 2021 at 12:36 PM Joao Martins <joao.m.martins@oracle.com> wrote: >> >> Use try_grab_compound_head() for device-dax GUP when configured with a >> compound pagemap. >> >> Rather than incrementing the refcount for each page, do one atomic >> addition for all the pages to be pinned. >> >> Performance measured by gup_benchmark improves considerably >> get_user_pages_fast() and pin_user_pages_fast() with NVDIMMs: >> >> $ gup_test -f /dev/dax1.0 -m 16384 -r 10 -S [-u,-a] -n 512 -w >> (get_user_pages_fast 2M pages) ~59 ms -> ~6.1 ms >> (pin_user_pages_fast 2M pages) ~87 ms -> ~6.2 ms >> [altmap] >> (get_user_pages_fast 2M pages) ~494 ms -> ~9 ms >> (pin_user_pages_fast 2M pages) ~494 ms -> ~10 ms >> >> $ gup_test -f /dev/dax1.0 -m 129022 -r 10 -S [-u,-a] -n 512 -w >> (get_user_pages_fast 2M pages) ~492 ms -> ~49 ms >> (pin_user_pages_fast 2M pages) ~493 ms -> ~50 ms >> [altmap with -m 127004] >> (get_user_pages_fast 2M pages) ~3.91 sec -> ~70 ms >> (pin_user_pages_fast 2M pages) ~3.97 sec -> ~74 ms >> >> Signed-off-by: Joao Martins <joao.m.martins@oracle.com> >> --- >> mm/gup.c | 53 +++++++++++++++++++++++++++++++++-------------------- >> 1 file changed, 33 insertions(+), 20 deletions(-) >> >> diff --git a/mm/gup.c b/mm/gup.c >> index 42b8b1fa6521..9baaa1c0b7f3 100644 >> --- a/mm/gup.c >> +++ b/mm/gup.c >> @@ -2234,31 +2234,55 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, >> } >> #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ >> >> + >> +static int record_subpages(struct page *page, unsigned long addr, >> + unsigned long end, struct page **pages) >> +{ >> + int nr; >> + >> + for (nr = 0; addr != end; addr += PAGE_SIZE) >> + pages[nr++] = page++; >> + >> + return nr; >> +} >> + >> #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) >> static int __gup_device_huge(unsigned long pfn, unsigned long addr, >> unsigned long end, unsigned int flags, >> struct page **pages, int *nr) >> { >> - int nr_start = *nr; >> + int refs, nr_start = *nr; >> struct dev_pagemap *pgmap = NULL; >> >> do { >> - struct page *page = pfn_to_page(pfn); >> + struct page *pinned_head, *head, *page = pfn_to_page(pfn); >> + unsigned long next; >> >> pgmap = get_dev_pagemap(pfn, pgmap); >> if (unlikely(!pgmap)) { >> undo_dev_pagemap(nr, nr_start, flags, pages); >> return 0; >> } >> - SetPageReferenced(page); >> - pages[*nr] = page; >> - if (unlikely(!try_grab_page(page, flags))) { >> - undo_dev_pagemap(nr, nr_start, flags, pages); >> + >> + head = compound_head(page); >> + /* @end is assumed to be limited at most one compound page */ >> + next = PageCompound(head) ? end : addr + PAGE_SIZE; > > Please no ternary operator for this check, but otherwise this patch > looks good to me. > OK. I take that you prefer this instead: unsigned long next = addr + PAGE_SIZE; [...] /* @end is assumed to be limited at most one compound page */ if (PageCompound(head)) next = end; > Reviewed-by: Dan Williams <dan.j.williams@intel.com> > Thanks!
On Wed, Jul 28, 2021 at 1:08 PM Joao Martins <joao.m.martins@oracle.com> wrote: > > > > On 7/28/21 8:55 PM, Dan Williams wrote: > > On Wed, Jul 14, 2021 at 12:36 PM Joao Martins <joao.m.martins@oracle.com> wrote: > >> > >> Use try_grab_compound_head() for device-dax GUP when configured with a > >> compound pagemap. > >> > >> Rather than incrementing the refcount for each page, do one atomic > >> addition for all the pages to be pinned. > >> > >> Performance measured by gup_benchmark improves considerably > >> get_user_pages_fast() and pin_user_pages_fast() with NVDIMMs: > >> > >> $ gup_test -f /dev/dax1.0 -m 16384 -r 10 -S [-u,-a] -n 512 -w > >> (get_user_pages_fast 2M pages) ~59 ms -> ~6.1 ms > >> (pin_user_pages_fast 2M pages) ~87 ms -> ~6.2 ms > >> [altmap] > >> (get_user_pages_fast 2M pages) ~494 ms -> ~9 ms > >> (pin_user_pages_fast 2M pages) ~494 ms -> ~10 ms > >> > >> $ gup_test -f /dev/dax1.0 -m 129022 -r 10 -S [-u,-a] -n 512 -w > >> (get_user_pages_fast 2M pages) ~492 ms -> ~49 ms > >> (pin_user_pages_fast 2M pages) ~493 ms -> ~50 ms > >> [altmap with -m 127004] > >> (get_user_pages_fast 2M pages) ~3.91 sec -> ~70 ms > >> (pin_user_pages_fast 2M pages) ~3.97 sec -> ~74 ms > >> > >> Signed-off-by: Joao Martins <joao.m.martins@oracle.com> > >> --- > >> mm/gup.c | 53 +++++++++++++++++++++++++++++++++-------------------- > >> 1 file changed, 33 insertions(+), 20 deletions(-) > >> > >> diff --git a/mm/gup.c b/mm/gup.c > >> index 42b8b1fa6521..9baaa1c0b7f3 100644 > >> --- a/mm/gup.c > >> +++ b/mm/gup.c > >> @@ -2234,31 +2234,55 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, > >> } > >> #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ > >> > >> + > >> +static int record_subpages(struct page *page, unsigned long addr, > >> + unsigned long end, struct page **pages) > >> +{ > >> + int nr; > >> + > >> + for (nr = 0; addr != end; addr += PAGE_SIZE) > >> + pages[nr++] = page++; > >> + > >> + return nr; > >> +} > >> + > >> #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) > >> static int __gup_device_huge(unsigned long pfn, unsigned long addr, > >> unsigned long end, unsigned int flags, > >> struct page **pages, int *nr) > >> { > >> - int nr_start = *nr; > >> + int refs, nr_start = *nr; > >> struct dev_pagemap *pgmap = NULL; > >> > >> do { > >> - struct page *page = pfn_to_page(pfn); > >> + struct page *pinned_head, *head, *page = pfn_to_page(pfn); > >> + unsigned long next; > >> > >> pgmap = get_dev_pagemap(pfn, pgmap); > >> if (unlikely(!pgmap)) { > >> undo_dev_pagemap(nr, nr_start, flags, pages); > >> return 0; > >> } > >> - SetPageReferenced(page); > >> - pages[*nr] = page; > >> - if (unlikely(!try_grab_page(page, flags))) { > >> - undo_dev_pagemap(nr, nr_start, flags, pages); > >> + > >> + head = compound_head(page); > >> + /* @end is assumed to be limited at most one compound page */ > >> + next = PageCompound(head) ? end : addr + PAGE_SIZE; > > > > Please no ternary operator for this check, but otherwise this patch > > looks good to me. > > > OK. I take that you prefer this instead: > > unsigned long next = addr + PAGE_SIZE; > > [...] > > /* @end is assumed to be limited at most one compound page */ > if (PageCompound(head)) > next = end; Yup.
On 7/28/21 9:23 PM, Dan Williams wrote: > On Wed, Jul 28, 2021 at 1:08 PM Joao Martins <joao.m.martins@oracle.com> wrote: >> On 7/28/21 8:55 PM, Dan Williams wrote: >>> On Wed, Jul 14, 2021 at 12:36 PM Joao Martins <joao.m.martins@oracle.com> wrote: >>>> >>>> Use try_grab_compound_head() for device-dax GUP when configured with a >>>> compound pagemap. >>>> >>>> Rather than incrementing the refcount for each page, do one atomic >>>> addition for all the pages to be pinned. >>>> >>>> Performance measured by gup_benchmark improves considerably >>>> get_user_pages_fast() and pin_user_pages_fast() with NVDIMMs: >>>> >>>> $ gup_test -f /dev/dax1.0 -m 16384 -r 10 -S [-u,-a] -n 512 -w >>>> (get_user_pages_fast 2M pages) ~59 ms -> ~6.1 ms >>>> (pin_user_pages_fast 2M pages) ~87 ms -> ~6.2 ms >>>> [altmap] >>>> (get_user_pages_fast 2M pages) ~494 ms -> ~9 ms >>>> (pin_user_pages_fast 2M pages) ~494 ms -> ~10 ms >>>> >>>> $ gup_test -f /dev/dax1.0 -m 129022 -r 10 -S [-u,-a] -n 512 -w >>>> (get_user_pages_fast 2M pages) ~492 ms -> ~49 ms >>>> (pin_user_pages_fast 2M pages) ~493 ms -> ~50 ms >>>> [altmap with -m 127004] >>>> (get_user_pages_fast 2M pages) ~3.91 sec -> ~70 ms >>>> (pin_user_pages_fast 2M pages) ~3.97 sec -> ~74 ms >>>> >>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com> >>>> --- >>>> mm/gup.c | 53 +++++++++++++++++++++++++++++++++-------------------- >>>> 1 file changed, 33 insertions(+), 20 deletions(-) >>>> >>>> diff --git a/mm/gup.c b/mm/gup.c >>>> index 42b8b1fa6521..9baaa1c0b7f3 100644 >>>> --- a/mm/gup.c >>>> +++ b/mm/gup.c >>>> @@ -2234,31 +2234,55 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, >>>> } >>>> #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ >>>> >>>> + >>>> +static int record_subpages(struct page *page, unsigned long addr, >>>> + unsigned long end, struct page **pages) >>>> +{ >>>> + int nr; >>>> + >>>> + for (nr = 0; addr != end; addr += PAGE_SIZE) >>>> + pages[nr++] = page++; >>>> + >>>> + return nr; >>>> +} >>>> + >>>> #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) >>>> static int __gup_device_huge(unsigned long pfn, unsigned long addr, >>>> unsigned long end, unsigned int flags, >>>> struct page **pages, int *nr) >>>> { >>>> - int nr_start = *nr; >>>> + int refs, nr_start = *nr; >>>> struct dev_pagemap *pgmap = NULL; >>>> >>>> do { >>>> - struct page *page = pfn_to_page(pfn); >>>> + struct page *pinned_head, *head, *page = pfn_to_page(pfn); >>>> + unsigned long next; >>>> >>>> pgmap = get_dev_pagemap(pfn, pgmap); >>>> if (unlikely(!pgmap)) { >>>> undo_dev_pagemap(nr, nr_start, flags, pages); >>>> return 0; >>>> } >>>> - SetPageReferenced(page); >>>> - pages[*nr] = page; >>>> - if (unlikely(!try_grab_page(page, flags))) { >>>> - undo_dev_pagemap(nr, nr_start, flags, pages); >>>> + >>>> + head = compound_head(page); >>>> + /* @end is assumed to be limited at most one compound page */ >>>> + next = PageCompound(head) ? end : addr + PAGE_SIZE; >>> >>> Please no ternary operator for this check, but otherwise this patch >>> looks good to me. >>> >> OK. I take that you prefer this instead: >> >> unsigned long next = addr + PAGE_SIZE; >> >> [...] >> >> /* @end is assumed to be limited at most one compound page */ >> if (PageCompound(head)) >> next = end; > > Yup. > In addition to abiove, I'll be remove @pinned_head variable and retain the current style that was with try_grab_page() while retaining the unlikely that was there before. I'm assuming I can retain the Reviewed-by tag, but let me know of otherwise (diff below). diff --git a/mm/gup.c b/mm/gup.c index ca64bc0b339e..398bee74105a 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2256,7 +2256,7 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, int ret = 1; do { - struct page *pinned_head, *head, *page = pfn_to_page(pfn); + struct page *head, *page = pfn_to_page(pfn); unsigned long next = addr + PAGE_SIZE; pgmap = get_dev_pagemap(pfn, pgmap); @@ -2273,8 +2273,7 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, refs = record_subpages(page, addr, next, pages + *nr); SetPageReferenced(head); - pinned_head = try_grab_compound_head(head, refs, flags); - if (unlikely(!pinned_head)) { + if (unlikely(!try_grab_compound_head(head, refs, flags))) { if (PageCompound(head)) ClearPageReferenced(head); else
On Wed, Aug 25, 2021 at 08:10:39PM +0100, Joao Martins wrote: > @@ -2273,8 +2273,7 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, > refs = record_subpages(page, addr, next, pages + *nr); > > SetPageReferenced(head); > - pinned_head = try_grab_compound_head(head, refs, flags); > - if (unlikely(!pinned_head)) { > + if (unlikely(!try_grab_compound_head(head, refs, flags))) { > if (PageCompound(head)) BTW, you can just check PageHead(head). We know it can't be PageTail ...
On 8/25/21 8:15 PM, Matthew Wilcox wrote: > On Wed, Aug 25, 2021 at 08:10:39PM +0100, Joao Martins wrote: >> @@ -2273,8 +2273,7 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, >> refs = record_subpages(page, addr, next, pages + *nr); >> >> SetPageReferenced(head); >> - pinned_head = try_grab_compound_head(head, refs, flags); >> - if (unlikely(!pinned_head)) { >> + if (unlikely(!try_grab_compound_head(head, refs, flags))) { >> if (PageCompound(head)) > > BTW, you can just check PageHead(head). We know it can't be PageTail ... > Ugh, yes. Your comment is also applicable to the other PageCompound() added before, as it's done on the compound head. I've fixed it on both, thanks!
diff --git a/mm/gup.c b/mm/gup.c index 42b8b1fa6521..9baaa1c0b7f3 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2234,31 +2234,55 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, } #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ + +static int record_subpages(struct page *page, unsigned long addr, + unsigned long end, struct page **pages) +{ + int nr; + + for (nr = 0; addr != end; addr += PAGE_SIZE) + pages[nr++] = page++; + + return nr; +} + #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) static int __gup_device_huge(unsigned long pfn, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { - int nr_start = *nr; + int refs, nr_start = *nr; struct dev_pagemap *pgmap = NULL; do { - struct page *page = pfn_to_page(pfn); + struct page *pinned_head, *head, *page = pfn_to_page(pfn); + unsigned long next; pgmap = get_dev_pagemap(pfn, pgmap); if (unlikely(!pgmap)) { undo_dev_pagemap(nr, nr_start, flags, pages); return 0; } - SetPageReferenced(page); - pages[*nr] = page; - if (unlikely(!try_grab_page(page, flags))) { - undo_dev_pagemap(nr, nr_start, flags, pages); + + head = compound_head(page); + /* @end is assumed to be limited at most one compound page */ + next = PageCompound(head) ? end : addr + PAGE_SIZE; + refs = record_subpages(page, addr, next, pages + *nr); + + SetPageReferenced(head); + pinned_head = try_grab_compound_head(head, refs, flags); + if (!pinned_head) { + if (PageCompound(head)) { + ClearPageReferenced(head); + put_dev_pagemap(pgmap); + } else { + undo_dev_pagemap(nr, nr_start, flags, pages); + } return 0; } - (*nr)++; - pfn++; - } while (addr += PAGE_SIZE, addr != end); + *nr += refs; + pfn += refs; + } while (addr += (refs << PAGE_SHIFT), addr != end); if (pgmap) put_dev_pagemap(pgmap); @@ -2318,17 +2342,6 @@ static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr, } #endif -static int record_subpages(struct page *page, unsigned long addr, - unsigned long end, struct page **pages) -{ - int nr; - - for (nr = 0; addr != end; addr += PAGE_SIZE) - pages[nr++] = page++; - - return nr; -} - #ifdef CONFIG_ARCH_HAS_HUGEPD static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, unsigned long sz)
Use try_grab_compound_head() for device-dax GUP when configured with a compound pagemap. Rather than incrementing the refcount for each page, do one atomic addition for all the pages to be pinned. Performance measured by gup_benchmark improves considerably get_user_pages_fast() and pin_user_pages_fast() with NVDIMMs: $ gup_test -f /dev/dax1.0 -m 16384 -r 10 -S [-u,-a] -n 512 -w (get_user_pages_fast 2M pages) ~59 ms -> ~6.1 ms (pin_user_pages_fast 2M pages) ~87 ms -> ~6.2 ms [altmap] (get_user_pages_fast 2M pages) ~494 ms -> ~9 ms (pin_user_pages_fast 2M pages) ~494 ms -> ~10 ms $ gup_test -f /dev/dax1.0 -m 129022 -r 10 -S [-u,-a] -n 512 -w (get_user_pages_fast 2M pages) ~492 ms -> ~49 ms (pin_user_pages_fast 2M pages) ~493 ms -> ~50 ms [altmap with -m 127004] (get_user_pages_fast 2M pages) ~3.91 sec -> ~70 ms (pin_user_pages_fast 2M pages) ~3.97 sec -> ~74 ms Signed-off-by: Joao Martins <joao.m.martins@oracle.com> --- mm/gup.c | 53 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 20 deletions(-)