Message ID | 20240222160943.622386-1-david@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [v1] mm: remove total_mapcount() | expand |
On Thu, Feb 22, 2024 at 05:09:43PM +0100, David Hildenbrand wrote: > We always get a head page, so we can just naturally interpret is as a folio > (similar to other code). memfd seems rather confused about how to iterate over the page cache. Perhaps we could sort that out and then delete total_mapcount as a second patch? I haven't tested this at all, but ... Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> diff --git a/mm/memfd.c b/mm/memfd.c index d3a1ba4208c9..45e55b0e3cbe 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -29,28 +29,29 @@ #define MEMFD_TAG_PINNED PAGECACHE_TAG_TOWRITE #define LAST_SCAN 4 /* about 150ms max */ +static bool memfd_extra_refs(struct folio *folio) +{ + return folio_ref_count(folio) - folio_mapcount(folio) != + folio_nr_pages(folio); +} + static void memfd_tag_pins(struct xa_state *xas) { - struct page *page; + struct folio *folio; int latency = 0; - int cache_count; lru_add_drain(); xas_lock_irq(xas); - xas_for_each(xas, page, ULONG_MAX) { - cache_count = 1; - if (!xa_is_value(page) && - PageTransHuge(page) && !PageHuge(page)) - cache_count = HPAGE_PMD_NR; - - if (!xa_is_value(page) && - page_count(page) - total_mapcount(page) != cache_count) + xas_for_each(xas, folio, ULONG_MAX) { + /* Can we have shadow/swap entries in memfd? */ + if (xa_is_value(folio)) + continue; + + if (memfd_extra_refs(folio)) xas_set_mark(xas, MEMFD_TAG_PINNED); - if (cache_count != 1) - xas_set(xas, page->index + cache_count); - latency += cache_count; + latency++; if (latency < XA_CHECK_SCHED) continue; latency = 0; @@ -75,7 +76,6 @@ static void memfd_tag_pins(struct xa_state *xas) static int memfd_wait_for_pins(struct address_space *mapping) { XA_STATE(xas, &mapping->i_pages, 0); - struct page *page; int error, scan; memfd_tag_pins(&xas); @@ -83,7 +83,7 @@ static int memfd_wait_for_pins(struct address_space *mapping) error = 0; for (scan = 0; scan <= LAST_SCAN; scan++) { int latency = 0; - int cache_count; + struct folio *folio; if (!xas_marked(&xas, MEMFD_TAG_PINNED)) break; @@ -95,16 +95,10 @@ static int memfd_wait_for_pins(struct address_space *mapping) xas_set(&xas, 0); xas_lock_irq(&xas); - xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) { + xas_for_each_marked(&xas, folio, ULONG_MAX, MEMFD_TAG_PINNED) { bool clear = true; - cache_count = 1; - if (!xa_is_value(page) && - PageTransHuge(page) && !PageHuge(page)) - cache_count = HPAGE_PMD_NR; - - if (!xa_is_value(page) && cache_count != - page_count(page) - total_mapcount(page)) { + if (memfd_extra_refs(folio)) { /* * On the last scan, we clean up all those tags * we inserted; but make a note that we still @@ -118,8 +112,7 @@ static int memfd_wait_for_pins(struct address_space *mapping) if (clear) xas_clear_mark(&xas, MEMFD_TAG_PINNED); - latency += cache_count; - if (latency < XA_CHECK_SCHED) + if (++latency < XA_CHECK_SCHED) continue; latency = 0;
On 22.02.24 18:13, Matthew Wilcox wrote: > On Thu, Feb 22, 2024 at 05:09:43PM +0100, David Hildenbrand wrote: >> We always get a head page, so we can just naturally interpret is as a folio >> (similar to other code). > > memfd seems rather confused about how to iterate over the page cache. > Perhaps we could sort that out and then delete total_mapcount as a > second patch? > > I haven't tested this at all, but ... > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > > diff --git a/mm/memfd.c b/mm/memfd.c > index d3a1ba4208c9..45e55b0e3cbe 100644 > --- a/mm/memfd.c > +++ b/mm/memfd.c > @@ -29,28 +29,29 @@ > #define MEMFD_TAG_PINNED PAGECACHE_TAG_TOWRITE > #define LAST_SCAN 4 /* about 150ms max */ > > +static bool memfd_extra_refs(struct folio *folio) > +{ > + return folio_ref_count(folio) - folio_mapcount(folio) != > + folio_nr_pages(folio); > +} That is an obvious improvement I should have realized myself. Let me play with that. Thanks!
diff --git a/include/linux/mm.h b/include/linux/mm.h index 6f4825d82965..49e22a2f6ccc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1183,7 +1183,7 @@ static inline int is_vmalloc_or_module_addr(const void *x) * How many times the entire folio is mapped as a single unit (eg by a * PMD or PUD entry). This is probably not what you want, except for * debugging purposes - it does not include PTE-mapped sub-pages; look - * at folio_mapcount() or page_mapcount() or total_mapcount() instead. + * at folio_mapcount() or page_mapcount() instead. */ static inline int folio_entire_mapcount(struct folio *folio) { @@ -1243,13 +1243,6 @@ static inline int folio_mapcount(struct folio *folio) return folio_total_mapcount(folio); } -static inline int total_mapcount(struct page *page) -{ - if (likely(!PageCompound(page))) - return atomic_read(&page->_mapcount) + 1; - return folio_total_mapcount(page_folio(page)); -} - static inline bool folio_large_is_mapped(struct folio *folio) { /* diff --git a/mm/memfd.c b/mm/memfd.c index d3a1ba4208c9..0a6c1a6ee03b 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -31,24 +31,25 @@ static void memfd_tag_pins(struct xa_state *xas) { - struct page *page; + struct folio *folio; int latency = 0; int cache_count; lru_add_drain(); xas_lock_irq(xas); - xas_for_each(xas, page, ULONG_MAX) { + xas_for_each(xas, folio, ULONG_MAX) { cache_count = 1; - if (!xa_is_value(page) && - PageTransHuge(page) && !PageHuge(page)) + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + !xa_is_value(folio) && folio_test_large(folio) && + !folio_test_hugetlb(folio)) cache_count = HPAGE_PMD_NR; - if (!xa_is_value(page) && - page_count(page) - total_mapcount(page) != cache_count) + if (!xa_is_value(folio) && cache_count != + folio_ref_count(folio) - folio_mapcount(folio)) xas_set_mark(xas, MEMFD_TAG_PINNED); if (cache_count != 1) - xas_set(xas, page->index + cache_count); + xas_set(xas, folio->index + cache_count); latency += cache_count; if (latency < XA_CHECK_SCHED) @@ -66,16 +67,16 @@ static void memfd_tag_pins(struct xa_state *xas) /* * Setting SEAL_WRITE requires us to verify there's no pending writer. However, * via get_user_pages(), drivers might have some pending I/O without any active - * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages + * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all folios * and see whether it has an elevated ref-count. If so, we tag them and wait for * them to be dropped. * The caller must guarantee that no new user will acquire writable references - * to those pages to avoid races. + * to those folios to avoid races. */ static int memfd_wait_for_pins(struct address_space *mapping) { XA_STATE(xas, &mapping->i_pages, 0); - struct page *page; + struct folio *folio; int error, scan; memfd_tag_pins(&xas); @@ -95,20 +96,21 @@ static int memfd_wait_for_pins(struct address_space *mapping) xas_set(&xas, 0); xas_lock_irq(&xas); - xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) { + xas_for_each_marked(&xas, folio, ULONG_MAX, MEMFD_TAG_PINNED) { bool clear = true; cache_count = 1; - if (!xa_is_value(page) && - PageTransHuge(page) && !PageHuge(page)) + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + !xa_is_value(folio) && folio_test_large(folio) && + !folio_test_hugetlb(folio)) cache_count = HPAGE_PMD_NR; - if (!xa_is_value(page) && cache_count != - page_count(page) - total_mapcount(page)) { + if (!xa_is_value(folio) && cache_count != + folio_ref_count(folio) - folio_mapcount(folio)) { /* * On the last scan, we clean up all those tags * we inserted; but make a note that we still - * found pages pinned. + * found folios pinned. */ if (scan == LAST_SCAN) error = -EBUSY;
mm/memfd.c is that last remaining user of total_mapcount(). Let's convert memfd_tag_pins() and memfd_wait_for_pins() to use folios instead of pages, so we can remove total_mapcount() for good. We always get a head page, so we can just naturally interpret is as a folio (similar to other code). Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Signed-off-by: David Hildenbrand <david@redhat.com> --- Did a quick test with write-sealing a memfd backed by THP. Seems to work as it used to. --- include/linux/mm.h | 9 +-------- mm/memfd.c | 34 ++++++++++++++++++---------------- 2 files changed, 19 insertions(+), 24 deletions(-)