diff mbox series

[v2,7/7] mm: huge_memory: enable debugfs to split huge pages to any order.

Message ID 20230329011712.3242298-8-zi.yan@sent.com (mailing list archive)
State New
Headers show
Series Split a folio to any lower order folios | expand

Commit Message

Zi Yan March 29, 2023, 1:17 a.m. UTC
From: Zi Yan <ziy@nvidia.com>

It is used to test split_huge_page_to_list_to_order for pagecache THPs.
Also add test cases for split_huge_page_to_list_to_order via both
debugfs, truncating a file, and punching holes in a file.

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 mm/huge_memory.c                              |  34 ++-
 .../selftests/mm/split_huge_page_test.c       | 225 +++++++++++++++++-
 2 files changed, 242 insertions(+), 17 deletions(-)
diff mbox series

Patch

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 619d25278340..ad5b29558a51 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3023,7 +3023,7 @@  static inline bool vma_not_suitable_for_thp_split(struct vm_area_struct *vma)
 }
 
 static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
-				unsigned long vaddr_end)
+				unsigned long vaddr_end, unsigned int new_order)
 {
 	int ret = 0;
 	struct task_struct *task;
@@ -3085,13 +3085,19 @@  static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
 			goto next;
 
 		total++;
-		if (!can_split_folio(page_folio(page), NULL))
+		/*
+		 * For folios with private, split_huge_page_to_list_to_order()
+		 * will try to drop it before split and then check if the folio
+		 * can be split or not. So skip the check here.
+		 */
+		if (!folio_test_private(page_folio(page)) &&
+		    !can_split_folio(page_folio(page), NULL))
 			goto next;
 
 		if (!trylock_page(page))
 			goto next;
 
-		if (!split_huge_page(page))
+		if (!split_huge_page_to_list_to_order(page, NULL, new_order))
 			split++;
 
 		unlock_page(page);
@@ -3109,7 +3115,7 @@  static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
 }
 
 static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
-				pgoff_t off_end)
+				pgoff_t off_end, unsigned int new_order)
 {
 	struct filename *file;
 	struct file *candidate;
@@ -3148,7 +3154,7 @@  static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
 		if (!folio_trylock(folio))
 			goto next;
 
-		if (!split_folio(folio))
+		if (!split_huge_page_to_list_to_order(&folio->page, NULL, new_order))
 			split++;
 
 		folio_unlock(folio);
@@ -3173,10 +3179,14 @@  static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
 {
 	static DEFINE_MUTEX(split_debug_mutex);
 	ssize_t ret;
-	/* hold pid, start_vaddr, end_vaddr or file_path, off_start, off_end */
+	/*
+	 * hold pid, start_vaddr, end_vaddr, new_order or
+	 * file_path, off_start, off_end, new_order
+	 */
 	char input_buf[MAX_INPUT_BUF_SZ];
 	int pid;
 	unsigned long vaddr_start, vaddr_end;
+	unsigned int new_order = 0;
 
 	ret = mutex_lock_interruptible(&split_debug_mutex);
 	if (ret)
@@ -3205,29 +3215,29 @@  static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
 			goto out;
 		}
 
-		ret = sscanf(buf, "0x%lx,0x%lx", &off_start, &off_end);
-		if (ret != 2) {
+		ret = sscanf(buf, "0x%lx,0x%lx,%d", &off_start, &off_end, &new_order);
+		if (ret != 2 && ret != 3) {
 			ret = -EINVAL;
 			goto out;
 		}
-		ret = split_huge_pages_in_file(file_path, off_start, off_end);
+		ret = split_huge_pages_in_file(file_path, off_start, off_end, new_order);
 		if (!ret)
 			ret = input_len;
 
 		goto out;
 	}
 
-	ret = sscanf(input_buf, "%d,0x%lx,0x%lx", &pid, &vaddr_start, &vaddr_end);
+	ret = sscanf(input_buf, "%d,0x%lx,0x%lx,%d", &pid, &vaddr_start, &vaddr_end, &new_order);
 	if (ret == 1 && pid == 1) {
 		split_huge_pages_all();
 		ret = strlen(input_buf);
 		goto out;
-	} else if (ret != 3) {
+	} else if (ret != 3 && ret != 4) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end);
+	ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end, new_order);
 	if (!ret)
 		ret = strlen(input_buf);
 out:
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index b8558c7f1a39..cbb5e6893cbf 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -16,6 +16,7 @@ 
 #include <sys/mount.h>
 #include <malloc.h>
 #include <stdbool.h>
+#include <time.h>
 #include "vm_util.h"
 
 uint64_t pagesize;
@@ -23,10 +24,12 @@  unsigned int pageshift;
 uint64_t pmd_pagesize;
 
 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
+#define SMAP_PATH "/proc/self/smaps"
+#define THP_FS_PATH "/mnt/thp_fs"
 #define INPUT_MAX 80
 
-#define PID_FMT "%d,0x%lx,0x%lx"
-#define PATH_FMT "%s,0x%lx,0x%lx"
+#define PID_FMT "%d,0x%lx,0x%lx,%d"
+#define PATH_FMT "%s,0x%lx,0x%lx,%d"
 
 #define PFN_MASK     ((1UL<<55)-1)
 #define KPF_THP      (1UL<<22)
@@ -113,7 +116,7 @@  void split_pmd_thp(void)
 
 	/* split all THPs */
 	write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
-		(uint64_t)one_page + len);
+		(uint64_t)one_page + len, 0);
 
 	for (i = 0; i < len; i++)
 		if (one_page[i] != (char)i) {
@@ -203,7 +206,7 @@  void split_pte_mapped_thp(void)
 
 	/* split all remapped THPs */
 	write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped,
-		      (uint64_t)pte_mapped + pagesize * 4);
+		      (uint64_t)pte_mapped + pagesize * 4, 0);
 
 	/* smap does not show THPs after mremap, use kpageflags instead */
 	thp_size = 0;
@@ -269,7 +272,7 @@  void split_file_backed_thp(void)
 	}
 
 	/* split the file-backed THP */
-	write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end);
+	write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, 0);
 
 	status = unlink(testfile);
 	if (status)
@@ -290,20 +293,232 @@  void split_file_backed_thp(void)
 	printf("file-backed THP split test done, please check dmesg for more information\n");
 }
 
+void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, char **addr)
+{
+	size_t i;
+	int dummy;
+
+	srand(time(NULL));
+
+	*fd = open(testfile, O_CREAT | O_RDWR, 0664);
+	if (*fd == -1) {
+		perror("Failed to create a file at "THP_FS_PATH);
+		exit(EXIT_FAILURE);
+	}
+
+	for (i = 0; i < fd_size; i++) {
+		unsigned char byte = (unsigned char)i;
+
+		write(*fd, &byte, sizeof(byte));
+	}
+	close(*fd);
+	sync();
+	*fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
+	if (*fd == -1) {
+		perror("open drop_caches");
+		goto err_out_unlink;
+	}
+	if (write(*fd, "3", 1) != 1) {
+		perror("write to drop_caches");
+		goto err_out_unlink;
+	}
+	close(*fd);
+
+	*fd = open(testfile, O_RDWR);
+	if (*fd == -1) {
+		perror("Failed to open a file at "THP_FS_PATH);
+		goto err_out_unlink;
+	}
+
+	*addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0);
+	if (*addr == (char *)-1) {
+		perror("cannot mmap");
+		goto err_out_close;
+	}
+	madvise(*addr, fd_size, MADV_HUGEPAGE);
+
+	for (size_t i = 0; i < fd_size; i++)
+		dummy += *(*addr + i);
+
+	if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) {
+		printf("No pagecache THP generated, please mount a filesystem supporting pagecache THP at "THP_FS_PATH"\n");
+		goto err_out_close;
+	}
+	return;
+err_out_close:
+	close(*fd);
+err_out_unlink:
+	unlink(testfile);
+	exit(EXIT_FAILURE);
+}
+
+void split_thp_in_pagecache_to_order(size_t fd_size, int order)
+{
+	int fd;
+	char *addr;
+	size_t i;
+	const char testfile[] = THP_FS_PATH "/test";
+	int err = 0;
+
+	create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+
+	printf("split %ld kB PMD-mapped pagecache page to order %d ... ", fd_size >> 10, order);
+	write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order);
+
+	for (i = 0; i < fd_size; i++)
+		if (*(addr + i) != (char)i) {
+			printf("%lu byte corrupted in the file\n", i);
+			err = EXIT_FAILURE;
+			goto out;
+		}
+
+	if (!check_huge_file(addr, 0, pmd_pagesize)) {
+		printf("Still FilePmdMapped not split\n");
+		err = EXIT_FAILURE;
+		goto out;
+	}
+
+	printf("done\n");
+out:
+	close(fd);
+	unlink(testfile);
+	if (err)
+		exit(err);
+}
+
+void truncate_thp_in_pagecache_to_order(size_t fd_size, int order)
+{
+	int fd;
+	char *addr;
+	size_t i;
+	const char testfile[] = THP_FS_PATH "/test";
+	int err = 0;
+
+	create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+
+	printf("truncate %ld kB PMD-mapped pagecache page to size %lu kB ... ",
+		fd_size >> 10, 4UL << order);
+	ftruncate(fd, pagesize << order);
+
+	for (i = 0; i < (pagesize << order); i++)
+		if (*(addr + i) != (char)i) {
+			printf("%lu byte corrupted in the file\n", i);
+			err = EXIT_FAILURE;
+			goto out;
+		}
+
+	if (!check_huge_file(addr, 0, pmd_pagesize)) {
+		printf("Still FilePmdMapped not split after truncate\n");
+		err = EXIT_FAILURE;
+		goto out;
+	}
+
+	printf("done\n");
+out:
+	close(fd);
+	unlink(testfile);
+	if (err)
+		exit(err);
+}
+
+void punch_hole_in_pagecache_thp(size_t fd_size, off_t offset[], off_t len[],
+			int n, int num_left_thps)
+{
+	int fd, j;
+	char *addr;
+	size_t i;
+	const char testfile[] = THP_FS_PATH "/test";
+	int err = 0;
+
+	create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+
+	for (j = 0; j < n; j++) {
+		printf("punch a hole to %ld kB PMD-mapped pagecache page at addr: %lx, offset %ld, and len %ld ...\n",
+			fd_size >> 10, (unsigned long)addr, offset[j], len[j]);
+		fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset[j], len[j]);
+	}
+
+	for (i = 0; i < fd_size; i++) {
+		int in_hole = 0;
+
+		for (j = 0; j < n; j++)
+			if (i >= offset[j] && i < (offset[j] + len[j])) {
+				in_hole = 1;
+				break;
+			}
+
+		if (in_hole) {
+			if (*(addr + i)) {
+				printf("%lu byte non-zero after punch\n", i);
+				err = EXIT_FAILURE;
+				goto out;
+			}
+			continue;
+		}
+		if (*(addr + i) != (char)i) {
+			printf("%lu byte corrupted in the file\n", i);
+			err = EXIT_FAILURE;
+			goto out;
+		}
+	}
+
+	if (!check_huge_file(addr, num_left_thps, pmd_pagesize)) {
+		printf("Still FilePmdMapped not split after punch\n");
+		goto out;
+	}
+	printf("done\n");
+out:
+	close(fd);
+	unlink(testfile);
+	if (err)
+		exit(err);
+}
+
 int main(int argc, char **argv)
 {
+	int i;
+	size_t fd_size;
+	off_t offset[2], len[2];
+
 	if (geteuid() != 0) {
 		printf("Please run the benchmark as root\n");
 		exit(EXIT_FAILURE);
 	}
 
+	setbuf(stdout, NULL);
+
 	pagesize = getpagesize();
 	pageshift = ffs(pagesize) - 1;
 	pmd_pagesize = read_pmd_pagesize();
+	fd_size = 2 * pmd_pagesize;
 
 	split_pmd_thp();
 	split_pte_mapped_thp();
 	split_file_backed_thp();
 
+	for (i = 8; i >= 0; i--)
+		if (i != 1)
+			split_thp_in_pagecache_to_order(fd_size, i);
+
+	/*
+	 * for i is 1, truncate code in the kernel should create order-0 pages
+	 * instead of order-1 THPs, since order-1 THP is not supported. No error
+	 * is expected.
+	 */
+	for (i = 8; i >= 0; i--)
+		truncate_thp_in_pagecache_to_order(fd_size, i);
+
+	offset[0] = 123;
+	offset[1] = 4 * pagesize;
+	len[0] = 200 * pagesize;
+	len[1] = 16 * pagesize;
+	punch_hole_in_pagecache_thp(fd_size, offset, len, 2, 1);
+
+	offset[0] = 259 * pagesize + pagesize / 2;
+	offset[1] = 33 * pagesize;
+	len[0] = 129 * pagesize;
+	len[1] = 16 * pagesize;
+	punch_hole_in_pagecache_thp(fd_size, offset, len, 2, 1);
+
 	return 0;
 }