diff mbox series

[RFC,1/4] mm: page_ext: add an iteration API for page extensions

Message ID 70bc5513e599d3386533fcc25dfe33685d2ca1bb.1737754625.git.luizcap@redhat.com (mailing list archive)
State New
Headers show
Series mm: page_ext: Fix crash when reserving 1G pages | expand

Commit Message

Luiz Capitulino Jan. 24, 2025, 9:37 p.m. UTC
The page extension implementation assumes that all page extensions of a
given page order are stored in the same memory section. The function
page_ext_next() relies on this assumption by adding an offset to the
current object to return the next adjacent page extension.

This behavior works as expected for flatmem but fails for sparsemem when
using 1G pages. Commit e98337d11bbd ("mm/contig_alloc: support
__GFP_COMP") exposes this issue, making it possible for a crash when
using page_owner or page_table_check page extensions.

The problem is that for 1G pages, the page extensions may span memory
section boundaries and be stored in different memory sections. This issue
was not visible before commit e98337d11bbd ("mm/contig_alloc: support
__GFP_COMP") because alloc_contig_pages() never passed more than
MAX_PAGE_ORDER to post_alloc_hook(). However, the mentioned commit
changed this behavior allowing the full 1G page order to be passed.

Reproducer:

 1. Build the kernel with CONFIG_SPARSEMEM=y and the table extensions
 2. Pass 'default_hugepagesz=1 page_owner=on' in the kernel command-line
 3. Reserve one 1G page at run-time, this should crash (backtrace below)

To address this issue, this commit introduces a new API for iterating
through page extensions. In page_ext_iter_next(), we always go through
page_ext_get() to guarantee that we do a new memory section lookup for
the next page extension. In the future, this API could be used as a basis
to implement for_each_page_ext() type of macro.

Thanks to David Hildenbrand for helping identify the root cause and
providing suggestions on how to fix it (final implementation and bugs are
all mine though).

Here's the backtrace, without kasan you can get random crashes:

[   76.052526] BUG: KASAN: slab-out-of-bounds in __update_page_owner_handle+0x238/0x298
[   76.060283] Write of size 4 at addr ffff07ff96240038 by task tee/3598
[   76.066714]
[   76.068203] CPU: 88 UID: 0 PID: 3598 Comm: tee Kdump: loaded Not tainted 6.13.0-rep1 #3
[   76.076202] Hardware name: WIWYNN Mt.Jade Server System B81.030Z1.0007/Mt.Jade Motherboard, BIOS 2.10.20220810 (SCP: 2.10.20220810) 2022/08/10
[   76.088972] Call trace:
[   76.091411]  show_stack+0x20/0x38 (C)
[   76.095073]  dump_stack_lvl+0x80/0xf8
[   76.098733]  print_address_description.constprop.0+0x88/0x398
[   76.104476]  print_report+0xa8/0x278
[   76.108041]  kasan_report+0xa8/0xf8
[   76.111520]  __asan_report_store4_noabort+0x20/0x30
[   76.116391]  __update_page_owner_handle+0x238/0x298
[   76.121259]  __set_page_owner+0xdc/0x140
[   76.125173]  post_alloc_hook+0x190/0x1d8
[   76.129090]  alloc_contig_range_noprof+0x54c/0x890
[   76.133874]  alloc_contig_pages_noprof+0x35c/0x4a8
[   76.138656]  alloc_gigantic_folio.isra.0+0x2c0/0x368
[   76.143616]  only_alloc_fresh_hugetlb_folio.isra.0+0x24/0x150
[   76.149353]  alloc_pool_huge_folio+0x11c/0x1f8
[   76.153787]  set_max_huge_pages+0x364/0xca8
[   76.157961]  __nr_hugepages_store_common+0xb0/0x1a0
[   76.162829]  nr_hugepages_store+0x108/0x118
[   76.167003]  kobj_attr_store+0x3c/0x70
[   76.170745]  sysfs_kf_write+0xfc/0x188
[   76.174492]  kernfs_fop_write_iter+0x274/0x3e0
[   76.178927]  vfs_write+0x64c/0x8e0
[   76.182323]  ksys_write+0xf8/0x1f0
[   76.185716]  __arm64_sys_write+0x74/0xb0
[   76.189630]  invoke_syscall.constprop.0+0xd8/0x1e0
[   76.194412]  do_el0_svc+0x164/0x1e0
[   76.197891]  el0_svc+0x40/0xe0
[   76.200939]  el0t_64_sync_handler+0x144/0x168
[   76.205287]  el0t_64_sync+0x1ac/0x1b0

Fixes: e98337d11bbd ("mm/contig_alloc: support __GFP_COMP")
Signed-off-by: Luiz Capitulino <luizcap@redhat.com>
---
 include/linux/page_ext.h | 10 ++++++++
 mm/page_ext.c            | 55 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)
diff mbox series

Patch

diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index e4b48a0dda244..df904544d3fac 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -93,6 +93,16 @@  static inline struct page_ext *page_ext_next(struct page_ext *curr)
 	return next;
 }
 
+struct page_ext_iter {
+	unsigned long pfn;
+	struct page_ext *page_ext;
+};
+
+struct page_ext *page_ext_iter_begin(struct page_ext_iter *iter, struct page *page);
+struct page_ext *page_ext_iter_get(const struct page_ext_iter *iter);
+struct page_ext *page_ext_iter_next(struct page_ext_iter *iter);
+void page_ext_iter_end(struct page_ext_iter *iter);
+
 #else /* !CONFIG_PAGE_EXTENSION */
 struct page_ext;
 
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 641d93f6af4c1..0b6eb5524cb2c 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -549,3 +549,58 @@  void page_ext_put(struct page_ext *page_ext)
 
 	rcu_read_unlock();
 }
+
+/**
+ * page_ext_iter_begin() - Prepare for iterating through page extensions.
+ * @iter: page extension iterator.
+ * @page: The page we're interested in.
+ *
+ * Return: NULL if no page_ext exists for this page.
+ */
+struct page_ext *page_ext_iter_begin(struct page_ext_iter *iter, struct page *page)
+{
+	iter->pfn = page_to_pfn(page);
+	iter->page_ext = page_ext_get(page);
+
+	return iter->page_ext;
+}
+
+/**
+ * page_ext_iter_get() - Get current page extension
+ * @iter: page extension iterator.
+ *
+ * Return: NULL if no page_ext exists for this iterator.
+ */
+struct page_ext *page_ext_iter_get(const struct page_ext_iter *iter)
+{
+	return iter->page_ext;
+}
+
+/**
+ * page_ext_iter_next() - Get next page extension
+ * @iter: page extension iterator.
+ *
+ * Return: NULL if no next page_ext exists.
+ */
+struct page_ext *page_ext_iter_next(struct page_ext_iter *iter)
+{
+	if (!iter->page_ext)
+		return NULL;
+
+	page_ext_put(iter->page_ext);
+
+	iter->pfn++;
+	iter->page_ext = page_ext_get(pfn_to_page(iter->pfn));
+
+	return iter->page_ext;
+}
+
+/**
+ * page_ext_iter_end() - End iteration through page extensions.
+ * @iter: page extension iterator.
+ */
+void page_ext_iter_end(struct page_ext_iter *iter)
+{
+	page_ext_put(iter->page_ext);
+	iter->page_ext = NULL;
+}