Message ID | 20231204142146.91437-24-david@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm/rmap: interface overhaul | expand |
On 04/12/2023 14:21, David Hildenbrand wrote: > Let's mimic what we did with folio_add_file_rmap_*() and > folio_add_anon_rmap_*() so we can similarly replace page_remove_rmap() > next. > > Make the compiler always special-case on the granularity by using > __always_inline. > > We're adding folio_remove_rmap_ptes() handling right away, as we want to > use that soon for batching rmap operations when unmapping PTE-mapped > large folios. > > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > include/linux/rmap.h | 6 ++++ > mm/rmap.c | 76 ++++++++++++++++++++++++++++++++++++-------- > 2 files changed, 68 insertions(+), 14 deletions(-) > > diff --git a/include/linux/rmap.h b/include/linux/rmap.h > index 017b216915f19..dd4ffb1d8ae04 100644 > --- a/include/linux/rmap.h > +++ b/include/linux/rmap.h > @@ -241,6 +241,12 @@ void folio_add_file_rmap_pmd(struct folio *, struct page *, > struct vm_area_struct *); > void page_remove_rmap(struct page *, struct vm_area_struct *, > bool compound); > +void folio_remove_rmap_ptes(struct folio *, struct page *, unsigned int nr, > + struct vm_area_struct *); > +#define folio_remove_rmap_pte(folio, page, vma) \ > + folio_remove_rmap_ptes(folio, page, 1, vma) > +void folio_remove_rmap_pmd(struct folio *, struct page *, > + struct vm_area_struct *); > > void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *, > unsigned long address, rmap_t flags); > diff --git a/mm/rmap.c b/mm/rmap.c > index 3587225055c5e..50b6909157ac1 100644 > --- a/mm/rmap.c > +++ b/mm/rmap.c > @@ -1463,25 +1463,36 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma, > bool compound) > { > struct folio *folio = page_folio(page); > + > + if (likely(!compound)) > + folio_remove_rmap_pte(folio, page, vma); > + else > + folio_remove_rmap_pmd(folio, page, vma); > +} > + > +static __always_inline void __folio_remove_rmap(struct folio *folio, > + struct page *page, unsigned int nr_pages, > + struct vm_area_struct *vma, enum rmap_mode mode) > +{ > atomic_t *mapped = &folio->_nr_pages_mapped; > - int nr = 0, nr_pmdmapped = 0; > - bool last; > + int last, nr = 0, nr_pmdmapped = 0; nit: you're being inconsistent across the functions with signed vs unsigned for page counts (e.g. nr, nr_pmdmapped) - see __folio_add_rmap(), __folio_add_file_rmap(), __folio_add_anon_rmap(). I suggest pick one and stick to it. Personally I'd go with signed int (since that's what all the counters in struct folio that we are manipulating are, underneath the atomic_t) then check that nr_pages > 0 in __folio_rmap_sanity_checks(). > enum node_stat_item idx; > > - VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); > - VM_BUG_ON_PAGE(compound && !PageHead(page), page); > + __folio_rmap_sanity_checks(folio, page, nr_pages, mode); > > /* Is page being unmapped by PTE? Is this its last map to be removed? */ > - if (likely(!compound)) { > - last = atomic_add_negative(-1, &page->_mapcount); > - nr = last; > - if (last && folio_test_large(folio)) { > - nr = atomic_dec_return_relaxed(mapped); > - nr = (nr < COMPOUND_MAPPED); > - } > - } else if (folio_test_pmd_mappable(folio)) { > - /* That test is redundant: it's for safety or to optimize out */ > + if (likely(mode == RMAP_MODE_PTE)) { > + do { > + last = atomic_add_negative(-1, &page->_mapcount); > + if (last && folio_test_large(folio)) { > + last = atomic_dec_return_relaxed(mapped); > + last = (last < COMPOUND_MAPPED); > + } > > + if (last) > + nr++; > + } while (page++, --nr_pages > 0); > + } else if (mode == RMAP_MODE_PMD) { > last = atomic_add_negative(-1, &folio->_entire_mapcount); > if (last) { > nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped); > @@ -1517,7 +1528,7 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma, > * is still mapped. > */ > if (folio_test_pmd_mappable(folio) && folio_test_anon(folio)) folio_test_pmd_mappable() -> folio_test_large() Since you're converting this to support batch PTE removal, it might as well also support smaller-than-pmd too? I currently have a patch to do this same change in the multi-size THP series. > - if (!compound || nr < nr_pmdmapped) > + if (mode == RMAP_MODE_PTE || nr < nr_pmdmapped) > deferred_split_folio(folio); > } > > @@ -1532,6 +1543,43 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma, > munlock_vma_folio(folio, vma); > } > > +/** > + * folio_remove_rmap_ptes - remove PTE mappings from a page range of a folio > + * @folio: The folio to remove the mappings from > + * @page: The first page to remove > + * @nr_pages: The number of pages that will be removed from the mapping > + * @vma: The vm area from which the mappings are removed > + * > + * The page range of the folio is defined by [page, page + nr_pages) > + * > + * The caller needs to hold the page table lock. > + */ > +void folio_remove_rmap_ptes(struct folio *folio, struct page *page, > + unsigned int nr_pages, struct vm_area_struct *vma) > +{ > + __folio_remove_rmap(folio, page, nr_pages, vma, RMAP_MODE_PTE); > +} > + > +/** > + * folio_remove_rmap_pmd - remove a PMD mapping from a page range of a folio > + * @folio: The folio to remove the mapping from > + * @page: The first page to remove > + * @vma: The vm area from which the mapping is removed > + * > + * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) > + * > + * The caller needs to hold the page table lock. > + */ > +void folio_remove_rmap_pmd(struct folio *folio, struct page *page, > + struct vm_area_struct *vma) > +{ > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > + __folio_remove_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_MODE_PMD); > +#else > + WARN_ON_ONCE(true); > +#endif > +} > + > /* > * @arg: enum ttu_flags will be passed to this argument > */
>> +static __always_inline void __folio_remove_rmap(struct folio *folio, >> + struct page *page, unsigned int nr_pages, >> + struct vm_area_struct *vma, enum rmap_mode mode) >> +{ >> atomic_t *mapped = &folio->_nr_pages_mapped; >> - int nr = 0, nr_pmdmapped = 0; >> - bool last; >> + int last, nr = 0, nr_pmdmapped = 0; > > nit: you're being inconsistent across the functions with signed vs unsigned for > page counts (e.g. nr, nr_pmdmapped) - see __folio_add_rmap(), > __folio_add_file_rmap(), __folio_add_anon_rmap(). > Definitely. > I suggest pick one and stick to it. Personally I'd go with signed int (since > that's what all the counters in struct folio that we are manipulating are, > underneath the atomic_t) then check that nr_pages > 0 in > __folio_rmap_sanity_checks(). Can do, but note that the counters are signed to detect udnerflows. It doesn't make sense here to pass a negative number. > >> enum node_stat_item idx; >> >> - VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); >> - VM_BUG_ON_PAGE(compound && !PageHead(page), page); >> + __folio_rmap_sanity_checks(folio, page, nr_pages, mode); >> >> /* Is page being unmapped by PTE? Is this its last map to be removed? */ >> - if (likely(!compound)) { >> - last = atomic_add_negative(-1, &page->_mapcount); >> - nr = last; >> - if (last && folio_test_large(folio)) { >> - nr = atomic_dec_return_relaxed(mapped); >> - nr = (nr < COMPOUND_MAPPED); >> - } >> - } else if (folio_test_pmd_mappable(folio)) { >> - /* That test is redundant: it's for safety or to optimize out */ >> + if (likely(mode == RMAP_MODE_PTE)) { >> + do { >> + last = atomic_add_negative(-1, &page->_mapcount); >> + if (last && folio_test_large(folio)) { >> + last = atomic_dec_return_relaxed(mapped); >> + last = (last < COMPOUND_MAPPED); >> + } >> >> + if (last) >> + nr++; >> + } while (page++, --nr_pages > 0); >> + } else if (mode == RMAP_MODE_PMD) { >> last = atomic_add_negative(-1, &folio->_entire_mapcount); >> if (last) { >> nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped); >> @@ -1517,7 +1528,7 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma, >> * is still mapped. >> */ >> if (folio_test_pmd_mappable(folio) && folio_test_anon(folio)) > > folio_test_pmd_mappable() -> folio_test_large() > > Since you're converting this to support batch PTE removal, it might as well also > support smaller-than-pmd too? I remember that you have a patch for that, right? :) > > I currently have a patch to do this same change in the multi-size THP series. > Ah, yes, and that should go in first.
On 05/12/2023 13:09, David Hildenbrand wrote: >>> +static __always_inline void __folio_remove_rmap(struct folio *folio, >>> + struct page *page, unsigned int nr_pages, >>> + struct vm_area_struct *vma, enum rmap_mode mode) >>> +{ >>> atomic_t *mapped = &folio->_nr_pages_mapped; >>> - int nr = 0, nr_pmdmapped = 0; >>> - bool last; >>> + int last, nr = 0, nr_pmdmapped = 0; >> >> nit: you're being inconsistent across the functions with signed vs unsigned for >> page counts (e.g. nr, nr_pmdmapped) - see __folio_add_rmap(), >> __folio_add_file_rmap(), __folio_add_anon_rmap(). >> > > Definitely. > >> I suggest pick one and stick to it. Personally I'd go with signed int (since >> that's what all the counters in struct folio that we are manipulating are, >> underneath the atomic_t) then check that nr_pages > 0 in >> __folio_rmap_sanity_checks(). > > Can do, but note that the counters are signed to detect udnerflows. It doesn't > make sense here to pass a negative number. I agree it doesn't make sense to pass negative - hence the check. These 2 functions are inconsistent on size, but agree on signed: long folio_nr_pages(struct folio *folio) int folio_nr_pages_mapped(struct folio *folio) I don't have a strong opinon. > >> >>> enum node_stat_item idx; >>> - VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); >>> - VM_BUG_ON_PAGE(compound && !PageHead(page), page); >>> + __folio_rmap_sanity_checks(folio, page, nr_pages, mode); >>> /* Is page being unmapped by PTE? Is this its last map to be removed? */ >>> - if (likely(!compound)) { >>> - last = atomic_add_negative(-1, &page->_mapcount); >>> - nr = last; >>> - if (last && folio_test_large(folio)) { >>> - nr = atomic_dec_return_relaxed(mapped); >>> - nr = (nr < COMPOUND_MAPPED); >>> - } >>> - } else if (folio_test_pmd_mappable(folio)) { >>> - /* That test is redundant: it's for safety or to optimize out */ >>> + if (likely(mode == RMAP_MODE_PTE)) { >>> + do { >>> + last = atomic_add_negative(-1, &page->_mapcount); >>> + if (last && folio_test_large(folio)) { >>> + last = atomic_dec_return_relaxed(mapped); >>> + last = (last < COMPOUND_MAPPED); >>> + } >>> + if (last) >>> + nr++; >>> + } while (page++, --nr_pages > 0); >>> + } else if (mode == RMAP_MODE_PMD) { >>> last = atomic_add_negative(-1, &folio->_entire_mapcount); >>> if (last) { >>> nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped); >>> @@ -1517,7 +1528,7 @@ void page_remove_rmap(struct page *page, struct >>> vm_area_struct *vma, >>> * is still mapped. >>> */ >>> if (folio_test_pmd_mappable(folio) && folio_test_anon(folio)) >> >> folio_test_pmd_mappable() -> folio_test_large() >> >> Since you're converting this to support batch PTE removal, it might as well also >> support smaller-than-pmd too? > > I remember that you have a patch for that, right? :) > >> >> I currently have a patch to do this same change in the multi-size THP series. >> > > Ah, yes, and that should go in first. > >
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 017b216915f19..dd4ffb1d8ae04 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -241,6 +241,12 @@ void folio_add_file_rmap_pmd(struct folio *, struct page *, struct vm_area_struct *); void page_remove_rmap(struct page *, struct vm_area_struct *, bool compound); +void folio_remove_rmap_ptes(struct folio *, struct page *, unsigned int nr, + struct vm_area_struct *); +#define folio_remove_rmap_pte(folio, page, vma) \ + folio_remove_rmap_ptes(folio, page, 1, vma) +void folio_remove_rmap_pmd(struct folio *, struct page *, + struct vm_area_struct *); void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address, rmap_t flags); diff --git a/mm/rmap.c b/mm/rmap.c index 3587225055c5e..50b6909157ac1 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1463,25 +1463,36 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma, bool compound) { struct folio *folio = page_folio(page); + + if (likely(!compound)) + folio_remove_rmap_pte(folio, page, vma); + else + folio_remove_rmap_pmd(folio, page, vma); +} + +static __always_inline void __folio_remove_rmap(struct folio *folio, + struct page *page, unsigned int nr_pages, + struct vm_area_struct *vma, enum rmap_mode mode) +{ atomic_t *mapped = &folio->_nr_pages_mapped; - int nr = 0, nr_pmdmapped = 0; - bool last; + int last, nr = 0, nr_pmdmapped = 0; enum node_stat_item idx; - VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); - VM_BUG_ON_PAGE(compound && !PageHead(page), page); + __folio_rmap_sanity_checks(folio, page, nr_pages, mode); /* Is page being unmapped by PTE? Is this its last map to be removed? */ - if (likely(!compound)) { - last = atomic_add_negative(-1, &page->_mapcount); - nr = last; - if (last && folio_test_large(folio)) { - nr = atomic_dec_return_relaxed(mapped); - nr = (nr < COMPOUND_MAPPED); - } - } else if (folio_test_pmd_mappable(folio)) { - /* That test is redundant: it's for safety or to optimize out */ + if (likely(mode == RMAP_MODE_PTE)) { + do { + last = atomic_add_negative(-1, &page->_mapcount); + if (last && folio_test_large(folio)) { + last = atomic_dec_return_relaxed(mapped); + last = (last < COMPOUND_MAPPED); + } + if (last) + nr++; + } while (page++, --nr_pages > 0); + } else if (mode == RMAP_MODE_PMD) { last = atomic_add_negative(-1, &folio->_entire_mapcount); if (last) { nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped); @@ -1517,7 +1528,7 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma, * is still mapped. */ if (folio_test_pmd_mappable(folio) && folio_test_anon(folio)) - if (!compound || nr < nr_pmdmapped) + if (mode == RMAP_MODE_PTE || nr < nr_pmdmapped) deferred_split_folio(folio); } @@ -1532,6 +1543,43 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma, munlock_vma_folio(folio, vma); } +/** + * folio_remove_rmap_ptes - remove PTE mappings from a page range of a folio + * @folio: The folio to remove the mappings from + * @page: The first page to remove + * @nr_pages: The number of pages that will be removed from the mapping + * @vma: The vm area from which the mappings are removed + * + * The page range of the folio is defined by [page, page + nr_pages) + * + * The caller needs to hold the page table lock. + */ +void folio_remove_rmap_ptes(struct folio *folio, struct page *page, + unsigned int nr_pages, struct vm_area_struct *vma) +{ + __folio_remove_rmap(folio, page, nr_pages, vma, RMAP_MODE_PTE); +} + +/** + * folio_remove_rmap_pmd - remove a PMD mapping from a page range of a folio + * @folio: The folio to remove the mapping from + * @page: The first page to remove + * @vma: The vm area from which the mapping is removed + * + * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) + * + * The caller needs to hold the page table lock. + */ +void folio_remove_rmap_pmd(struct folio *folio, struct page *page, + struct vm_area_struct *vma) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + __folio_remove_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_MODE_PMD); +#else + WARN_ON_ONCE(true); +#endif +} + /* * @arg: enum ttu_flags will be passed to this argument */
Let's mimic what we did with folio_add_file_rmap_*() and folio_add_anon_rmap_*() so we can similarly replace page_remove_rmap() next. Make the compiler always special-case on the granularity by using __always_inline. We're adding folio_remove_rmap_ptes() handling right away, as we want to use that soon for batching rmap operations when unmapping PTE-mapped large folios. Signed-off-by: David Hildenbrand <david@redhat.com> --- include/linux/rmap.h | 6 ++++ mm/rmap.c | 76 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 68 insertions(+), 14 deletions(-)