Message ID | 20240226141324.278526-2-david@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm: remove total_mapcount() | expand |
On Mon, Feb 26, 2024 at 03:13:23PM +0100, David Hildenbrand wrote: > + xas_for_each(xas, folio, ULONG_MAX) { > + if (!xa_is_value(folio) && memfd_folio_has_extra_refs(folio)) > xas_set_mark(xas, MEMFD_TAG_PINNED); ... we decline to tag value entries here ... > @@ -95,20 +90,15 @@ static int memfd_wait_for_pins(struct address_space *mapping) > > xas_set(&xas, 0); > xas_lock_irq(&xas); > - xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) { > + xas_for_each_marked(&xas, folio, ULONG_MAX, MEMFD_TAG_PINNED) { > bool clear = true; > > - cache_count = 1; > - if (!xa_is_value(page) && > - PageTransHuge(page) && !PageHuge(page)) > - cache_count = HPAGE_PMD_NR; > - > - if (!xa_is_value(page) && cache_count != > - page_count(page) - total_mapcount(page)) { > + if (!xa_is_value(folio) && > + memfd_folio_has_extra_refs(folio)) { ... so we don't need to test it here because we'll never see any value entries. No?
On 26.02.24 17:07, Matthew Wilcox wrote: > On Mon, Feb 26, 2024 at 03:13:23PM +0100, David Hildenbrand wrote: >> + xas_for_each(xas, folio, ULONG_MAX) { >> + if (!xa_is_value(folio) && memfd_folio_has_extra_refs(folio)) >> xas_set_mark(xas, MEMFD_TAG_PINNED); > > ... we decline to tag value entries here ... > >> @@ -95,20 +90,15 @@ static int memfd_wait_for_pins(struct address_space *mapping) >> >> xas_set(&xas, 0); >> xas_lock_irq(&xas); >> - xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) { >> + xas_for_each_marked(&xas, folio, ULONG_MAX, MEMFD_TAG_PINNED) { >> bool clear = true; >> >> - cache_count = 1; >> - if (!xa_is_value(page) && >> - PageTransHuge(page) && !PageHuge(page)) >> - cache_count = HPAGE_PMD_NR; >> - >> - if (!xa_is_value(page) && cache_count != >> - page_count(page) - total_mapcount(page)) { >> + if (!xa_is_value(folio) && >> + memfd_folio_has_extra_refs(folio)) { > > ... so we don't need to test it here because we'll never see any value > entries. No? I was not able to convince myself that swapout code would clear the mark when replacing the entry. shmem_writepage()->shmem_delete_from_page_cache()->shmem_replace_entry() will perform a xas_store() with swp_to_radix_entry(swap) under xa_lock_irq(). Reading the doc, and staring at the code for a bit too long, I think xas_store() would only clear tags when deleting an entry (passing NULL). But maybe xas_store() will always clear tags? In memfd code, I think we could see swapout between memfd_tag_pins() and the check for tags, where we drop the xa_lock. Unless some other lock (inode lock?) protects us. Thanks!
On Mon, Feb 26, 2024 at 05:56:09PM +0100, David Hildenbrand wrote: > > > @@ -95,20 +90,15 @@ static int memfd_wait_for_pins(struct address_space *mapping) > > > xas_set(&xas, 0); > > > xas_lock_irq(&xas); > > > - xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) { > > > + xas_for_each_marked(&xas, folio, ULONG_MAX, MEMFD_TAG_PINNED) { > > > bool clear = true; > > > - cache_count = 1; > > > - if (!xa_is_value(page) && > > > - PageTransHuge(page) && !PageHuge(page)) > > > - cache_count = HPAGE_PMD_NR; > > > - > > > - if (!xa_is_value(page) && cache_count != > > > - page_count(page) - total_mapcount(page)) { > > > + if (!xa_is_value(folio) && > > > + memfd_folio_has_extra_refs(folio)) { > > > > ... so we don't need to test it here because we'll never see any value > > entries. No? > > I was not able to convince myself that swapout code would clear the mark > when replacing the entry. > > shmem_writepage()->shmem_delete_from_page_cache()->shmem_replace_entry() > > will perform a xas_store() with swp_to_radix_entry(swap) under > xa_lock_irq(). > > Reading the doc, and staring at the code for a bit too long, I think > xas_store() would only clear tags when deleting an entry (passing NULL). > > But maybe xas_store() will always clear tags? No, xas_store() will leave the tag alone ... this is the right thing to do for the pagecache because we always clear the tags before removing a folio from the cache. > In memfd code, I think we could see swapout between memfd_tag_pins() and the > check for tags, where we drop the xa_lock. Unless some other lock (inode > lock?) protects us. ... and if it does happen, we see the value entry tagged and clear the tag on it. OK. Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
diff --git a/mm/memfd.c b/mm/memfd.c index d3a1ba4208c90..7d8d3ab3fa378 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -29,29 +29,25 @@ #define MEMFD_TAG_PINNED PAGECACHE_TAG_TOWRITE #define LAST_SCAN 4 /* about 150ms max */ +static bool memfd_folio_has_extra_refs(struct folio *folio) +{ + return folio_ref_count(folio) - folio_mapcount(folio) != + folio_nr_pages(folio); +} + static void memfd_tag_pins(struct xa_state *xas) { - struct page *page; + struct folio *folio; int latency = 0; - int cache_count; lru_add_drain(); xas_lock_irq(xas); - xas_for_each(xas, page, ULONG_MAX) { - cache_count = 1; - if (!xa_is_value(page) && - PageTransHuge(page) && !PageHuge(page)) - cache_count = HPAGE_PMD_NR; - - if (!xa_is_value(page) && - page_count(page) - total_mapcount(page) != cache_count) + xas_for_each(xas, folio, ULONG_MAX) { + if (!xa_is_value(folio) && memfd_folio_has_extra_refs(folio)) xas_set_mark(xas, MEMFD_TAG_PINNED); - if (cache_count != 1) - xas_set(xas, page->index + cache_count); - latency += cache_count; - if (latency < XA_CHECK_SCHED) + if (++latency < XA_CHECK_SCHED) continue; latency = 0; @@ -66,16 +62,16 @@ static void memfd_tag_pins(struct xa_state *xas) /* * Setting SEAL_WRITE requires us to verify there's no pending writer. However, * via get_user_pages(), drivers might have some pending I/O without any active - * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages + * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all folios * and see whether it has an elevated ref-count. If so, we tag them and wait for * them to be dropped. * The caller must guarantee that no new user will acquire writable references - * to those pages to avoid races. + * to those folios to avoid races. */ static int memfd_wait_for_pins(struct address_space *mapping) { XA_STATE(xas, &mapping->i_pages, 0); - struct page *page; + struct folio *folio; int error, scan; memfd_tag_pins(&xas); @@ -83,7 +79,6 @@ static int memfd_wait_for_pins(struct address_space *mapping) error = 0; for (scan = 0; scan <= LAST_SCAN; scan++) { int latency = 0; - int cache_count; if (!xas_marked(&xas, MEMFD_TAG_PINNED)) break; @@ -95,20 +90,15 @@ static int memfd_wait_for_pins(struct address_space *mapping) xas_set(&xas, 0); xas_lock_irq(&xas); - xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) { + xas_for_each_marked(&xas, folio, ULONG_MAX, MEMFD_TAG_PINNED) { bool clear = true; - cache_count = 1; - if (!xa_is_value(page) && - PageTransHuge(page) && !PageHuge(page)) - cache_count = HPAGE_PMD_NR; - - if (!xa_is_value(page) && cache_count != - page_count(page) - total_mapcount(page)) { + if (!xa_is_value(folio) && + memfd_folio_has_extra_refs(folio)) { /* * On the last scan, we clean up all those tags * we inserted; but make a note that we still - * found pages pinned. + * found folios pinned. */ if (scan == LAST_SCAN) error = -EBUSY; @@ -118,8 +108,7 @@ static int memfd_wait_for_pins(struct address_space *mapping) if (clear) xas_clear_mark(&xas, MEMFD_TAG_PINNED); - latency += cache_count; - if (latency < XA_CHECK_SCHED) + if (++latency < XA_CHECK_SCHED) continue; latency = 0;