[v3,6/7] mm/swap, shmem: use unified swapin helper for shmem

Message ID	20240129175423.1987-7-ryncsn@gmail.com (mailing list archive)
State	New
Headers	show Return-Path: <owner-linux-mm@kvack.org> From: Kairui Song <ryncsn@gmail.com> To: linux-mm@kvack.org Cc: Andrew Morton <akpm@linux-foundation.org>, Chris Li <chrisl@kernel.org>, "Huang, Ying" <ying.huang@intel.com>, Hugh Dickins <hughd@google.com>, Johannes Weiner <hannes@cmpxchg.org>, Matthew Wilcox <willy@infradead.org>, Michal Hocko <mhocko@suse.com>, Yosry Ahmed <yosryahmed@google.com>, David Hildenbrand <david@redhat.com>, linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com> Subject: [PATCH v3 6/7] mm/swap, shmem: use unified swapin helper for shmem Date: Tue, 30 Jan 2024 01:54:21 +0800 Message-ID: <20240129175423.1987-7-ryncsn@gmail.com> In-Reply-To: <20240129175423.1987-1-ryncsn@gmail.com> References: <20240129175423.1987-1-ryncsn@gmail.com> Reply-To: Kairui Song <kasong@tencent.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: owner-linux-mm@kvack.org Precedence: bulk
Series	swapin refactor for optimization and unified readahead \| expand [v3,0/7] swapin refactor for optimization and unified readahead [v3,1/7] mm/swapfile.c: add back some comment [v3,2/7] mm/swap: move no readahead swapin code to a stand-alone helper [v3,3/7] mm/swap: always account swapped in page into current memcg [v3,4/7] mm/swap: introduce swapin_entry for unified readahead policy [v3,5/7] mm/swap: avoid a duplicated swap cache lookup for SWP_SYNCHRONOUS_IO [v3,6/7] mm/swap, shmem: use unified swapin helper for shmem [v3,7/7] mm/swap: refactor swap_cache_get_folio

diff --git a/mm/memory.c b/mm/memory.c index 349946899f8d..51962126a79c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3866,7 +3866,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!folio) { folio = swapin_entry(entry, GFP_HIGHUSER_MOVABLE, - vmf, &swapcache, shadow); + vmf, NULL, 0, &swapcache, shadow); if (!folio) { /* * Back out if somebody else faulted in this pte diff --git a/mm/shmem.c b/mm/shmem.c index 698a31bf7baa..d3722e25cb32 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1565,15 +1565,16 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) static struct mempolicy *shmem_get_pgoff_policy(struct shmem_inode_info *info, pgoff_t index, unsigned int order, pgoff_t *ilx); -static struct folio *shmem_swapin_cluster(swp_entry_t swap, gfp_t gfp, - struct shmem_inode_info *info, pgoff_t index) +static struct folio *shmem_swapin(swp_entry_t swap, gfp_t gfp, + struct shmem_inode_info *info, pgoff_t index, + struct folio **swapcache, void *shadow) { struct mempolicy *mpol; pgoff_t ilx; struct folio *folio; mpol = shmem_get_pgoff_policy(info, index, 0, &ilx); - folio = swap_cluster_readahead(swap, gfp, mpol, ilx); + folio = swapin_entry(swap, gfp, NULL, mpol, ilx, swapcache, shadow); mpol_cond_put(mpol); return folio; @@ -1852,8 +1853,9 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, { struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); + struct folio *swapcache = NULL, *folio; struct swap_info_struct *si; - struct folio *folio = NULL; + void *shadow = NULL; swp_entry_t swap; int error; @@ -1873,8 +1875,10 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, } /* Look it up and read it in.. */ - folio = swap_cache_get_folio(swap, NULL, 0, NULL); - if (!folio) { + folio = swap_cache_get_folio(swap, NULL, 0, &shadow); + if (folio) { + swapcache = folio; + } else { /* Or update major stats only when swapin succeeds?? */ if (fault_type) { *fault_type |= VM_FAULT_MAJOR; @@ -1882,7 +1886,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, count_memcg_event_mm(fault_mm, PGMAJFAULT); } /* Here we actually start the io */ - folio = shmem_swapin_cluster(swap, gfp, info, index); + folio = shmem_swapin(swap, gfp, info, index, &swapcache, shadow); if (!folio) { error = -ENOMEM; goto failed; @@ -1891,17 +1895,21 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, /* We have to do this with folio locked to prevent races */ folio_lock(folio); - if (!folio_test_swapcache(folio) || - folio->swap.val != swap.val || - !shmem_confirm_swap(mapping, index, swap)) { + if (swapcache) { + if (!folio_test_swapcache(folio) || folio->swap.val != swap.val) { + error = -EEXIST; + goto unlock; + } + if (!folio_test_uptodate(folio)) { + error = -EIO; + goto failed; + } + folio_wait_writeback(folio); + } + if (!shmem_confirm_swap(mapping, index, swap)) { error = -EEXIST; goto unlock; } - if (!folio_test_uptodate(folio)) { - error = -EIO; - goto failed; - } - folio_wait_writeback(folio); /* * Some architectures may have to restore extra metadata to the @@ -1909,12 +1917,19 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, */ arch_swap_restore(swap, folio); - if (shmem_should_replace_folio(folio, gfp)) { + /* If swapcache is bypassed, folio is newly allocated respects gfp flags */ + if (swapcache && shmem_should_replace_folio(folio, gfp)) { error = shmem_replace_folio(&folio, gfp, info, index); if (error) goto failed; } + /* + * The expected value checking below should be enough to ensure + * only one up-to-date swapin success. swap_free() is called after + * this, so the entry can't be reused. As long as the mapping still + * has the old entry value, it's never swapped in or modified. + */ error = shmem_add_to_page_cache(folio, mapping, index, swp_to_radix_entry(swap), gfp); if (error) @@ -1925,7 +1940,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, if (sgp == SGP_WRITE) folio_mark_accessed(folio); - delete_from_swap_cache(folio); + if (swapcache) + delete_from_swap_cache(folio); folio_mark_dirty(folio); swap_free(swap); put_swap_device(si); diff --git a/mm/swap.h b/mm/swap.h index ca9cb472a263..597a56c7fb02 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -53,10 +53,9 @@ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_flags, struct mempolicy *mpol, pgoff_t ilx, bool *new_page_allocated, bool skip_if_exists); -struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, - struct mempolicy *mpol, pgoff_t ilx); struct folio *swapin_entry(swp_entry_t entry, gfp_t flag, struct vm_fault *vmf, - struct folio **swapcached, void *shadow); + struct mempolicy *mpol, pgoff_t ilx, + struct folio **swapcache, void *shadow); static inline unsigned int folio_swap_flags(struct folio *folio) { @@ -81,14 +80,9 @@ static inline void show_swap_cache_info(void) { } -static inline struct folio *swap_cluster_readahead(swp_entry_t entry, - gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx) -{ - return NULL; -} - static inline struct folio *swapin_entry(swp_entry_t swp, gfp_t gfp_mask, - struct vm_fault *vmf, struct folio **swapcached, void *shadow) + struct vm_fault *vmf, struct mempolicy *mpol, pgoff_t ilx, + struct folio *swapcache, void *shadow); { return NULL; } diff --git a/mm/swap_state.c b/mm/swap_state.c index e41a137a6123..20c206149be4 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -316,6 +316,18 @@ void free_pages_and_swap_cache(struct encoded_page **pages, int nr) release_pages(pages, nr); } +static inline bool swap_use_no_readahead(struct swap_info_struct *si, swp_entry_t entry) +{ + int count; + + if (!data_race(si->flags & SWP_SYNCHRONOUS_IO)) + return false; + + count = __swap_count(entry); + + return (count == 1 || count == SWAP_MAP_SHMEM); +} + static inline bool swap_use_vma_readahead(void) { return READ_ONCE(enable_vma_readahead) && !atomic_read(&nr_rotate_swap); @@ -635,8 +647,8 @@ static unsigned long swapin_nr_pages(unsigned long offset) * are used for every page of the readahead: neighbouring pages on swap * are fairly likely to have been swapped out from the same node. */ -struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, - struct mempolicy *mpol, pgoff_t ilx) +static struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, + struct mempolicy *mpol, pgoff_t ilx) { struct folio *folio; unsigned long entry_offset = swp_offset(entry); @@ -876,14 +888,13 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, * in. */ static struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask, - struct vm_fault *vmf, void *shadow) + struct mempolicy *mpol, pgoff_t ilx, + void *shadow) { - struct vm_area_struct *vma = vmf->vma; struct folio *folio; - /* skip swapcache */ - folio = vma_alloc_folio(gfp_mask, 0, - vma, vmf->address, false); + folio = (struct folio *)alloc_pages_mpol(gfp_mask, 0, + mpol, ilx, numa_node_id()); if (folio) { __folio_set_locked(folio); __folio_set_swapbacked(folio); @@ -916,6 +927,10 @@ static struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask, * @gfp_mask: memory allocation flags * @vmf: fault information * @swapcache: set to the swapcache folio if swapcache is used + * @mpol: NUMA memory alloc policy to be applied, + * not needed if vmf is not NULL + * @targ_ilx: NUMA interleave index, for use only when MPOL_INTERLEAVE, + * not needed if vmf is not NULL * * Returns the struct page for entry and addr, after queueing swapin. * @@ -924,26 +939,29 @@ static struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask, * or vma-based(ie, virtual address based on faulty address) readahead, * or skip the readahead(ie, ramdisk based swap device). */ -struct folio *swapin_entry(swp_entry_t entry, gfp_t gfp_mask, - struct vm_fault *vmf, struct folio **swapcache, void *shadow) +struct folio *swapin_entry(swp_entry_t entry, gfp_t gfp_mask, struct vm_fault *vmf, + struct mempolicy *mpol, pgoff_t ilx, + struct folio **swapcache, void *shadow) { - struct mempolicy *mpol; + bool mpol_put = false; struct folio *folio; - pgoff_t ilx; - if (data_race(swp_swap_info(entry)->flags & SWP_SYNCHRONOUS_IO) && - __swap_count(entry) == 1) { - folio = swapin_direct(entry, gfp_mask, vmf, shadow); - } else { + if (!mpol) { mpol = get_vma_policy(vmf->vma, vmf->address, 0, &ilx); - if (swap_use_vma_readahead()) + mpol_put = true; + } + if (swap_use_no_readahead(swp_swap_info(entry), entry)) { + folio = swapin_direct(entry, gfp_mask, mpol, ilx, shadow); + } else { + if (vmf && swap_use_vma_readahead()) folio = swap_vma_readahead(entry, gfp_mask, mpol, ilx, vmf); else folio = swap_cluster_readahead(entry, gfp_mask, mpol, ilx); - mpol_cond_put(mpol); if (swapcache) *swapcache = folio; } + if (mpol_put) + mpol_cond_put(mpol); return folio; } diff --git a/mm/swapfile.c b/mm/swapfile.c index aac26f5a6cec..7ff05aaf6925 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1875,7 +1875,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, }; folio = swapin_entry(entry, GFP_HIGHUSER_MOVABLE, - &vmf, NULL, NULL); + &vmf, NULL, 0, NULL, NULL); } if (!folio) { /*

[v3,6/7] mm/swap, shmem: use unified swapin helper for shmem

Commit Message

Comments

Patch