[5/7] mm: thp: split huge page to any lower order pages.

Message ID	20201119160605.1272425-6-zi.yan@sent.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=7/0P=EZ=kvack.org=owner-linux-mm@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 845BB246E4 From: Zi Yan <zi.yan@sent.com> To: linux-mm@kvack.org, Matthew Wilcox <willy@infradead.org>, "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com> Cc: Roman Gushchin <guro@fb.com>, Andrew Morton <akpm@linux-foundation.org>, linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org, Yang Shi <shy828301@gmail.com>, Michal Hocko <mhocko@kernel.org>, John Hubbard <jhubbard@nvidia.com>, Ralph Campbell <rcampbell@nvidia.com>, David Nellans <dnellans@nvidia.com>, Zi Yan <ziy@nvidia.com> Subject: [PATCH 5/7] mm: thp: split huge page to any lower order pages. Date: Thu, 19 Nov 2020 11:06:03 -0500 Message-Id: <20201119160605.1272425-6-zi.yan@sent.com> In-Reply-To: <20201119160605.1272425-1-zi.yan@sent.com> References: <20201119160605.1272425-1-zi.yan@sent.com> Reply-To: Zi Yan <ziy@nvidia.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Sender: owner-linux-mm@kvack.org Precedence: bulk
Series	Split huge pages to any lower order pages and selftests. \| expand [0/7] Split huge pages to any lower order pages and selftests. [1/7] XArray: Fix splitting to non-zero orders [2/7] mm: huge_memory: add new debugfs interface to trigger split huge page on any page range. [3/7] mm: memcg: make memcg huge page split support any order split. [4/7] mm: page_owner: add support for splitting to any order in split page_owner. [5/7] mm: thp: split huge page to any lower order pages. [6/7] mm: truncate: split thp to a non-zero order if possible. [7/7] mm: huge_memory: enable debugfs to split huge pages to any order.

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7723deda33e2..0c856f805617 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -182,6 +182,8 @@ bool is_transparent_hugepage(struct page *page); bool can_split_huge_page(struct page *page, int *pextra_pins); int split_huge_page_to_list(struct page *page, struct list_head *list); +int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + unsigned int new_order); static inline int split_huge_page(struct page *page) { return split_huge_page_to_list(page, NULL); @@ -385,6 +387,12 @@ split_huge_page_to_list(struct page *page, struct list_head *list) { return 0; } +static inline int +split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + unsigned int new_order) +{ + return 0; +} static inline int split_huge_page(struct page *page) { return 0; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index aae7405a0989..cc70f70862d8 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2325,12 +2325,14 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, static void unmap_page(struct page *page) { - enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | - TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; + enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_RMAP_LOCKED; bool unmap_success; VM_BUG_ON_PAGE(!PageHead(page), page); + if (thp_order(page) >= HPAGE_PMD_ORDER) + ttu_flags |= TTU_SPLIT_HUGE_PMD; + if (PageAnon(page)) ttu_flags |= TTU_SPLIT_FREEZE; @@ -2338,21 +2340,23 @@ static void unmap_page(struct page *page) VM_BUG_ON_PAGE(!unmap_success, page); } -static void remap_page(struct page *page, unsigned int nr) +static void remap_page(struct page *page, unsigned int nr, unsigned int new_nr) { - int i; - if (PageTransHuge(page)) { + unsigned int i; + + if (thp_nr_pages(page) == nr) { remove_migration_ptes(page, page, true); } else { - for (i = 0; i < nr; i++) + for (i = 0; i < nr; i += new_nr) remove_migration_ptes(page + i, page + i, true); } } static void __split_huge_page_tail(struct page *head, int tail, - struct lruvec *lruvec, struct list_head *list) + struct lruvec *lruvec, struct list_head *list, unsigned int new_order) { struct page *page_tail = head + tail; + unsigned long compound_head_flag = new_order ? (1L << PG_head) : 0; VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail); @@ -2376,6 +2380,7 @@ static void __split_huge_page_tail(struct page *head, int tail, #ifdef CONFIG_64BIT (1L << PG_arch_2) | #endif + compound_head_flag | (1L << PG_dirty))); /* ->mapping in first tail page is compound_mapcount */ @@ -2384,7 +2389,10 @@ static void __split_huge_page_tail(struct page *head, int tail, page_tail->mapping = head->mapping; page_tail->index = head->index + tail; - /* Page flags must be visible before we make the page non-compound. */ + /* + * Page flags must be visible before we make the page non-compound or + * a compound page in new_order. + */ smp_wmb(); /* @@ -2394,10 +2402,15 @@ static void __split_huge_page_tail(struct page *head, int tail, * which needs correct compound_head(). */ clear_compound_head(page_tail); + if (new_order) { + prep_compound_page(page_tail, new_order); + thp_prep(page_tail); + } /* Finally unfreeze refcount. Additional reference from page cache. */ - page_ref_unfreeze(page_tail, 1 + (!PageAnon(head) || - PageSwapCache(head))); + page_ref_unfreeze(page_tail, 1 + ((!PageAnon(head) || + PageSwapCache(head)) ? + thp_nr_pages(page_tail) : 0)); if (page_is_young(head)) set_page_young(page_tail); @@ -2415,7 +2428,7 @@ static void __split_huge_page_tail(struct page *head, int tail, } static void __split_huge_page(struct page *page, struct list_head *list, - pgoff_t end, unsigned long flags) + pgoff_t end, unsigned long flags, unsigned int new_order) { struct page *head = compound_head(page); pg_data_t *pgdat = page_pgdat(head); @@ -2424,12 +2437,13 @@ static void __split_huge_page(struct page *page, struct list_head *list, unsigned long offset = 0; unsigned int order = thp_order(head); unsigned int nr = thp_nr_pages(head); + unsigned int new_nr = 1 << new_order; int i; lruvec = mem_cgroup_page_lruvec(head, pgdat); /* complete memcg works before add pages to LRU */ - mem_cgroup_split_huge_fixup(head, 0); + mem_cgroup_split_huge_fixup(head, new_order); if (PageAnon(head) && PageSwapCache(head)) { swp_entry_t entry = { .val = page_private(head) }; @@ -2439,46 +2453,54 @@ static void __split_huge_page(struct page *page, struct list_head *list, xa_lock(&swap_cache->i_pages); } - for (i = nr - 1; i >= 1; i--) { - __split_huge_page_tail(head, i, lruvec, list); + for (i = nr - new_nr; i >= new_nr; i -= new_nr) { + __split_huge_page_tail(head, i, lruvec, list, new_order); /* Some pages can be beyond i_size: drop them from page cache */ if (head[i].index >= end) { ClearPageDirty(head + i); __delete_from_page_cache(head + i, NULL); if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head)) - shmem_uncharge(head->mapping->host, 1); + shmem_uncharge(head->mapping->host, new_nr); put_page(head + i); } else if (!PageAnon(page)) { __xa_store(&head->mapping->i_pages, head[i].index, head + i, 0); } else if (swap_cache) { + /* + * split anonymous THPs (including swapped out ones) to + * non-zero order not supported + */ + VM_BUG_ON(new_order); __xa_store(&swap_cache->i_pages, offset + i, head + i, 0); } } - ClearPageCompound(head); + if (!new_order) + ClearPageCompound(head); + else + set_compound_order(head, new_order); - split_page_owner(head, order, 0); + split_page_owner(head, order, new_order); /* See comment in __split_huge_page_tail() */ if (PageAnon(head)) { /* Additional pin to swap cache */ if (PageSwapCache(head)) { - page_ref_add(head, 2); + page_ref_add(head, 1 + new_nr); xa_unlock(&swap_cache->i_pages); } else { page_ref_inc(head); } } else { /* Additional pin to page cache */ - page_ref_add(head, 2); + page_ref_add(head, 1 + new_nr); xa_unlock(&head->mapping->i_pages); } spin_unlock_irqrestore(&pgdat->lru_lock, flags); - remap_page(head, nr); + remap_page(head, nr, new_nr); if (PageSwapCache(head)) { swp_entry_t entry = { .val = page_private(head) }; @@ -2486,7 +2508,14 @@ static void __split_huge_page(struct page *page, struct list_head *list, split_swap_cluster(entry); } - for (i = 0; i < nr; i++) { + /* + * set page to its compound_head when split to THPs, so that GUP pin and + * PG_locked are transferred to the right after-split page + */ + if (new_order) + page = compound_head(page); + + for (i = 0; i < nr; i += new_nr) { struct page *subpage = head + i; if (subpage == page) continue; @@ -2604,37 +2633,61 @@ bool can_split_huge_page(struct page *page, int *pextra_pins) * This function splits huge page into normal pages. @page can point to any * subpage of huge page to split. Split doesn't change the position of @page. * + * See split_huge_page_to_list_to_order() for more details. + * + * Returns 0 if the hugepage is split successfully. + * Returns -EBUSY if the page is pinned or if anon_vma disappeared from under + * us. + */ +int split_huge_page_to_list(struct page *page, struct list_head *list) +{ + return split_huge_page_to_list_to_order(page, list, 0); +} + +/* + * This function splits huge page into pages in @new_order. @page can point to + * any subpage of huge page to split. Split doesn't change the position of + * @page. + * * Only caller must hold pin on the @page, otherwise split fails with -EBUSY. * The huge page must be locked. * * If @list is null, tail pages will be added to LRU list, otherwise, to @list. * - * Both head page and tail pages will inherit mapping, flags, and so on from - * the hugepage. + * Pages in new_order will inherit mapping, flags, and so on from the hugepage. * - * GUP pin and PG_locked transferred to @page. Rest subpages can be freed if - * they are not mapped. + * GUP pin and PG_locked transferred to @page or the compound page @page belongs + * to. Rest subpages can be freed if they are not mapped. * * Returns 0 if the hugepage is split successfully. * Returns -EBUSY if the page is pinned or if anon_vma disappeared from under * us. */ -int split_huge_page_to_list(struct page *page, struct list_head *list) +int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + unsigned int new_order) { struct page *head = compound_head(page); struct pglist_data *pgdata = NODE_DATA(page_to_nid(head)); struct deferred_split *ds_queue = get_deferred_split_queue(head); - XA_STATE(xas, &head->mapping->i_pages, head->index); + /* reset xarray order to new order after split */ + XA_STATE_ORDER(xas, &head->mapping->i_pages, head->index, new_order); struct anon_vma *anon_vma = NULL; struct address_space *mapping = NULL; int count, mapcount, extra_pins, ret; unsigned long flags; pgoff_t end; + VM_BUG_ON(thp_order(head) <= new_order); VM_BUG_ON_PAGE(is_huge_zero_page(head), head); VM_BUG_ON_PAGE(!PageLocked(head), head); VM_BUG_ON_PAGE(!PageCompound(head), head); + /* Cannot split THP to order-1 (no order-1 THPs) */ + VM_BUG_ON(new_order == 1); + + /* Split anonymous THP to non-zero order not support */ + VM_BUG_ON(PageAnon(head) && new_order); + if (PageWriteback(head)) return -EBUSY; @@ -2720,18 +2773,22 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) { if (!list_empty(page_deferred_list(head))) { ds_queue->split_queue_len--; - list_del(page_deferred_list(head)); + list_del_init(page_deferred_list(head)); } spin_unlock(&ds_queue->split_queue_lock); if (mapping) { if (PageSwapBacked(head)) __dec_lruvec_page_state(head, NR_SHMEM_THPS); - else + else if (!new_order) + /* + * Decrease THP stats only if split to normal + * pages + */ __mod_lruvec_page_state(head, NR_FILE_THPS, -thp_nr_pages(head)); } - __split_huge_page(page, list, end, flags); + __split_huge_page(page, list, end, flags, new_order); ret = 0; } else { if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { @@ -2746,7 +2803,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) fail: if (mapping) xas_unlock(&xas); spin_unlock_irqrestore(&pgdata->lru_lock, flags); - remap_page(head, thp_nr_pages(head)); + remap_page(head, thp_nr_pages(head), 1); ret = -EBUSY; } diff --git a/mm/swap.c b/mm/swap.c index b667870c8a0b..8f53ab593438 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -983,7 +983,6 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *list) { VM_BUG_ON_PAGE(!PageHead(page), page); - VM_BUG_ON_PAGE(PageCompound(page_tail), page); VM_BUG_ON_PAGE(PageLRU(page_tail), page); lockdep_assert_held(&lruvec_pgdat(lruvec)->lru_lock);

[5/7] mm: thp: split huge page to any lower order pages.

Commit Message

Patch