diff mbox series

[v1,2/3] selftests/mm: gup_longterm: new functional test for FOLL_LONGTERM

Message ID 20230519102723.185721-3-david@redhat.com (mailing list archive)
State Accepted
Commit c879462a08feafe1bc10f34089f39932a2e1d712
Headers show
Series selftests/mm: new test for FOLL_LONGTERM on file mappings | expand

Commit Message

David Hildenbrand May 19, 2023, 10:27 a.m. UTC
Let's add a new test for checking whether GUP long-term page pinning
works as expected (R/O vs. R/W, MAP_PRIVATE vs. MAP_SHARED, GUP vs.
GUP-fast). Note that COW handling with long-term R/O pinning in private
mappings, and pinning of anonymous memory in general, is tested by the
COW selftest. This test, therefore, focuses on page pinning in
file mappings.

The most interesting case is probably the "local tmpfile" case, as that
will likely end up on a "real" filesystem such as ext4 or xfs, not on a
virtual one like tmpfs or hugetlb where any long-term page pinning is
always expected to succeed.

For now, only add tests that use the "/sys/kernel/debug/gup_test"
interface. We'll add tests based on liburing separately next.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 tools/testing/selftests/mm/Makefile       |   3 +
 tools/testing/selftests/mm/gup_longterm.c | 386 ++++++++++++++++++++++
 tools/testing/selftests/mm/run_vmtests.sh |   4 +-
 3 files changed, 392 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/mm/gup_longterm.c

Comments

Lorenzo Stoakes May 28, 2023, 3:03 p.m. UTC | #1
On Fri, May 19, 2023 at 12:27:22PM +0200, David Hildenbrand wrote:
> Let's add a new test for checking whether GUP long-term page pinning
> works as expected (R/O vs. R/W, MAP_PRIVATE vs. MAP_SHARED, GUP vs.
> GUP-fast). Note that COW handling with long-term R/O pinning in private
> mappings, and pinning of anonymous memory in general, is tested by the
> COW selftest. This test, therefore, focuses on page pinning in
> file mappings.
>
> The most interesting case is probably the "local tmpfile" case, as that
> will likely end up on a "real" filesystem such as ext4 or xfs, not on a
> virtual one like tmpfs or hugetlb where any long-term page pinning is
> always expected to succeed.
>
> For now, only add tests that use the "/sys/kernel/debug/gup_test"
> interface. We'll add tests based on liburing separately next.
>
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  tools/testing/selftests/mm/Makefile       |   3 +
>  tools/testing/selftests/mm/gup_longterm.c | 386 ++++++++++++++++++++++
>  tools/testing/selftests/mm/run_vmtests.sh |   4 +-
>  3 files changed, 392 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/mm/gup_longterm.c
>
> diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
> index 23af4633f0f4..95acb099315e 100644
> --- a/tools/testing/selftests/mm/Makefile
> +++ b/tools/testing/selftests/mm/Makefile
> @@ -34,6 +34,7 @@ LDLIBS = -lrt -lpthread
>
>  TEST_GEN_PROGS = cow
>  TEST_GEN_PROGS += compaction_test
> +TEST_GEN_PROGS += gup_longterm
>  TEST_GEN_PROGS += gup_test
>  TEST_GEN_PROGS += hmm-tests
>  TEST_GEN_PROGS += hugetlb-madvise
> @@ -164,6 +165,8 @@ endif
>  # IOURING_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
>  $(OUTPUT)/cow: LDLIBS += $(IOURING_EXTRA_LIBS)
>
> +$(OUTPUT)/gup_longterm: LDLIBS += $(IOURING_EXTRA_LIBS)
> +
>  $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
>
>  $(OUTPUT)/ksm_tests: LDLIBS += -lnuma
> diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
> new file mode 100644
> index 000000000000..44a3617fd423
> --- /dev/null
> +++ b/tools/testing/selftests/mm/gup_longterm.c
> @@ -0,0 +1,386 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * GUP long-term page pinning tests.
> + *
> + * Copyright 2023, Red Hat, Inc.
> + *
> + * Author(s): David Hildenbrand <david@redhat.com>
> + */
> +#define _GNU_SOURCE
> +#include <stdlib.h>
> +#include <string.h>
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <assert.h>
> +#include <sys/mman.h>
> +#include <sys/ioctl.h>
> +#include <sys/vfs.h>
> +#include <linux/magic.h>
> +#include <linux/memfd.h>
> +
> +#include "local_config.h"
> +
> +#include "../../../../mm/gup_test.h"
> +#include "../kselftest.h"
> +#include "vm_util.h"
> +
> +static size_t pagesize;
> +static int nr_hugetlbsizes;
> +static size_t hugetlbsizes[10];
> +static int gup_fd;
> +
> +static __fsword_t get_fs_type(int fd)
> +{
> +	struct statfs fs;
> +	int ret;
> +
> +	do {
> +		ret = fstatfs(fd, &fs);
> +	} while (ret && errno == EINTR);
> +
> +	return ret ? 0 : fs.f_type;
> +}
> +
> +static bool fs_is_unknown(__fsword_t fs_type)
> +{
> +	/*
> +	 * We only support some filesystems in our tests when dealing with
> +	 * R/W long-term pinning. For these filesystems, we can be fairly sure
> +	 * whether they support it or not.
> +	 */
> +	switch (fs_type) {
> +	case TMPFS_MAGIC:
> +	case HUGETLBFS_MAGIC:
> +	case BTRFS_SUPER_MAGIC:
> +	case EXT4_SUPER_MAGIC:
> +	case XFS_SUPER_MAGIC:
> +		return false;
> +	default:
> +		return true;
> +	}
> +}
> +
> +static bool fs_supports_writable_longterm_pinning(__fsword_t fs_type)
> +{
> +	assert(!fs_is_unknown(fs_type));
> +	switch (fs_type) {
> +	case TMPFS_MAGIC:
> +	case HUGETLBFS_MAGIC:
> +		return true;
> +	default:
> +		return false;
> +	}
> +}
> +
> +enum test_type {
> +	TEST_TYPE_RO,
> +	TEST_TYPE_RO_FAST,
> +	TEST_TYPE_RW,
> +	TEST_TYPE_RW_FAST,
> +};
> +
> +static void do_test(int fd, size_t size, enum test_type type, bool shared)
> +{
> +	__fsword_t fs_type = get_fs_type(fd);
> +	bool should_work;
> +	char *mem;
> +	int ret;
> +
> +	if (ftruncate(fd, size)) {
> +		ksft_test_result_fail("ftruncate() failed\n");
> +		return;
> +	}
> +
> +	if (fallocate(fd, 0, 0, size)) {
> +		if (size == pagesize)
> +			ksft_test_result_fail("fallocate() failed\n");
> +		else
> +			ksft_test_result_skip("need more free huge pages\n");
> +		return;
> +	}
> +
> +	mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
> +		   shared ? MAP_SHARED : MAP_PRIVATE, fd, 0);
> +	if (mem == MAP_FAILED) {
> +		if (size == pagesize || shared)
> +			ksft_test_result_fail("mmap() failed\n");
> +		else
> +			ksft_test_result_skip("need more free huge pages\n");
> +		return;
> +	}
> +
> +	/*
> +	 * Fault in the page writable such that GUP-fast can eventually pin
> +	 * it immediately.
> +	 */
> +	memset(mem, 0, size);

Nitty but why not just MAP_POPULATE?

> +
> +	switch (type) {
> +	case TEST_TYPE_RO:
> +	case TEST_TYPE_RO_FAST:
> +	case TEST_TYPE_RW:
> +	case TEST_TYPE_RW_FAST: {
> +		struct pin_longterm_test args;
> +		const bool fast = type == TEST_TYPE_RO_FAST ||
> +				  type == TEST_TYPE_RW_FAST;
 +		const bool rw = type == TEST_TYPE_RW ||
> +				type == TEST_TYPE_RW_FAST;
> +
> +		if (gup_fd < 0) {
> +			ksft_test_result_skip("gup_test not available\n");
> +			break;
> +		}
> +
> +		if (rw && shared && fs_is_unknown(fs_type)) {
> +			ksft_test_result_skip("Unknown filesystem\n");
> +			return;
> +		}
> +		/*
> +		 * R/O pinning or pinning in a private mapping is always
> +		 * expected to work. Otherwise, we expect long-term R/W pinning
> +		 * to only succeed for special fielesystems.
> +		 */
> +		should_work = !shared || !rw ||
> +			      fs_supports_writable_longterm_pinning(fs_type);
> +
> +		args.addr = (__u64)(uintptr_t)mem;
> +		args.size = size;
> +		args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
> +		args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0;
> +		ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
> +		if (ret && errno == EINVAL) {
> +			ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
> +			break;
> +		} else if (ret && errno == EFAULT) {
> +			ksft_test_result(!should_work, "Should have failed\n");
> +			break;
> +		} else if (ret) {
> +			ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
> +			break;
> +		}
> +
> +		if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP))
> +			ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
> +
> +		/*
> +		 * TODO: if the kernel ever supports long-term R/W pinning on
> +		 * some previously unsupported filesystems, we might want to
> +		 * perform some additional tests for possible data corruptions.
> +		 */
> +		ksft_test_result(should_work, "Should have worked\n");
> +		break;
> +	}
> +	default:
> +		assert(false);
> +	}
> +
> +	munmap(mem, size);
> +}
> +
> +typedef void (*test_fn)(int fd, size_t size);
> +
> +static void run_with_memfd(test_fn fn, const char *desc)
> +{
> +	int fd;
> +
> +	ksft_print_msg("[RUN] %s ... with memfd\n", desc);
> +
> +	fd = memfd_create("test", 0);
> +	if (fd < 0) {
> +		ksft_test_result_fail("memfd_create() failed\n");
> +		return;
> +	}
> +
> +	fn(fd, pagesize);
> +	close(fd);
> +}
> +
> +static void run_with_tmpfile(test_fn fn, const char *desc)
> +{
> +	FILE *file;
> +	int fd;
> +
> +	ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
> +
> +	file = tmpfile();
> +	if (!file) {
> +		ksft_test_result_fail("tmpfile() failed\n");
> +		return;
> +	}
> +
> +	fd = fileno(file);
> +	if (fd < 0) {
> +		ksft_test_result_fail("fileno() failed\n");
> +		return;
> +	}
> +
> +	fn(fd, pagesize);
> +	fclose(file);
> +}
> +
> +static void run_with_local_tmpfile(test_fn fn, const char *desc)
> +{
> +	char filename[] = __FILE__"_tmpfile_XXXXXX";
> +	int fd;
> +
> +	ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc);
> +
> +	fd = mkstemp(filename);
> +	if (fd < 0) {
> +		ksft_test_result_fail("mkstemp() failed\n");
> +		return;
> +	}
> +
> +	if (unlink(filename)) {
> +		ksft_test_result_fail("unlink() failed\n");
> +		goto close;
> +	}
> +
> +	fn(fd, pagesize);
> +close:
> +	close(fd);
> +}
> +
> +static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
> +				   size_t hugetlbsize)
> +{
> +	int flags = MFD_HUGETLB;
> +	int fd;
> +
> +	ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
> +		       hugetlbsize / 1024);
> +
> +	flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;

Hm this feels a little cute :)

> +
> +	fd = memfd_create("test", flags);
> +	if (fd < 0) {
> +		ksft_test_result_skip("memfd_create() failed\n");
> +		return;
> +	}
> +
> +	fn(fd, hugetlbsize);
> +	close(fd);
> +}
> +
> +struct test_case {
> +	const char *desc;
> +	test_fn fn;
> +};
> +
> +static void test_shared_rw_pin(int fd, size_t size)
> +{
> +	do_test(fd, size, TEST_TYPE_RW, true);
> +}
> +
> +static void test_shared_rw_fast_pin(int fd, size_t size)
> +{
> +	do_test(fd, size, TEST_TYPE_RW_FAST, true);
> +}
> +
> +static void test_shared_ro_pin(int fd, size_t size)
> +{
> +	do_test(fd, size, TEST_TYPE_RO, true);
> +}
> +
> +static void test_shared_ro_fast_pin(int fd, size_t size)
> +{
> +	do_test(fd, size, TEST_TYPE_RO_FAST, true);
> +}
> +
> +static void test_private_rw_pin(int fd, size_t size)
> +{
> +	do_test(fd, size, TEST_TYPE_RW, false);
> +}
> +
> +static void test_private_rw_fast_pin(int fd, size_t size)
> +{
> +	do_test(fd, size, TEST_TYPE_RW_FAST, false);
> +}
> +
> +static void test_private_ro_pin(int fd, size_t size)
> +{
> +	do_test(fd, size, TEST_TYPE_RO, false);
> +}
> +
> +static void test_private_ro_fast_pin(int fd, size_t size)
> +{
> +	do_test(fd, size, TEST_TYPE_RO_FAST, false);
> +}
> +
> +static const struct test_case test_cases[] = {
> +	{
> +		"R/W longterm GUP pin in MAP_SHARED file mapping",
> +		test_shared_rw_pin,
> +	},
> +	{
> +		"R/W longterm GUP-fast pin in MAP_SHARED file mapping",
> +		test_shared_rw_fast_pin,
> +	},
> +	{
> +		"R/O longterm GUP pin in MAP_SHARED file mapping",
> +		test_shared_ro_pin,
> +	},
> +	{
> +		"R/O longterm GUP-fast pin in MAP_SHARED file mapping",
> +		test_shared_ro_fast_pin,
> +	},
> +	{
> +		"R/W longterm GUP pin in MAP_PRIVATE file mapping",
> +		test_private_rw_pin,
> +	},
> +	{
> +		"R/W longterm GUP-fast pin in MAP_PRIVATE file mapping",
> +		test_private_rw_fast_pin,
> +	},
> +	{
> +		"R/O longterm GUP pin in MAP_PRIVATE file mapping",
> +		test_private_ro_pin,
> +	},
> +	{
> +		"R/O longterm GUP-fast pin in MAP_PRIVATE file mapping",
> +		test_private_ro_fast_pin,
> +	},
> +};
> +
> +static void run_test_case(struct test_case const *test_case)
> +{
> +	int i;
> +
> +	run_with_memfd(test_case->fn, test_case->desc);
> +	run_with_tmpfile(test_case->fn, test_case->desc);
> +	run_with_local_tmpfile(test_case->fn, test_case->desc);
> +	for (i = 0; i < nr_hugetlbsizes; i++)
> +		run_with_memfd_hugetlb(test_case->fn, test_case->desc,
> +				       hugetlbsizes[i]);
> +}
> +
> +static int tests_per_test_case(void)
> +{
> +	return 3 + nr_hugetlbsizes;
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	int i, err;
> +
> +	pagesize = getpagesize();
> +	nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
> +						    ARRAY_SIZE(hugetlbsizes));
> +
> +	ksft_print_header();
> +	ksft_set_plan(ARRAY_SIZE(test_cases) * tests_per_test_case());
> +
> +	gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
> +
> +	for (i = 0; i < ARRAY_SIZE(test_cases); i++)
> +		run_test_case(&test_cases[i]);
> +
> +	err = ksft_get_fail_cnt();
> +	if (err)
> +		ksft_exit_fail_msg("%d out of %d tests failed\n",
> +				   err, ksft_test_num());
> +	return ksft_exit_pass();
> +}
> diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
> index 4893eb60d96d..b6b1eb6a8a6b 100644
> --- a/tools/testing/selftests/mm/run_vmtests.sh
> +++ b/tools/testing/selftests/mm/run_vmtests.sh
> @@ -24,7 +24,7 @@ separated by spaces:
>  - mmap
>  	tests for mmap(2)
>  - gup_test
> -	tests for gup using gup_test interface
> +	tests for gup

Super nitty again, but I'm guessing this means the CONFIG_GUP_TEST
interface, perhaps worth keeping?

>  - userfaultfd
>  	tests for  userfaultfd(2)
>  - compaction
> @@ -196,6 +196,8 @@ CATEGORY="gup_test" run_test ./gup_test -a
>  # Dump pages 0, 19, and 4096, using pin_user_pages:
>  CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000
>
> +CATEGORY="gup_test" run_test ./gup_longterm
> +
>  CATEGORY="userfaultfd" run_test ./uffd-unit-tests
>  uffd_stress_bin=./uffd-stress
>  CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16
> --
> 2.40.1
>

OK this patch is really nice + well implemented, I can only point out a
couple EXTREMELY nitty comments :) Thanks very much for adding a test for
this, it's super useful!

Therefore,

Reviewed-by: Lorenzo Stoakes <lstoakes@gmail.com>
David Hildenbrand June 1, 2023, 8:16 a.m. UTC | #2
On 28.05.23 17:03, Lorenzo Stoakes wrote:
> On Fri, May 19, 2023 at 12:27:22PM +0200, David Hildenbrand wrote:
>> Let's add a new test for checking whether GUP long-term page pinning
>> works as expected (R/O vs. R/W, MAP_PRIVATE vs. MAP_SHARED, GUP vs.
>> GUP-fast). Note that COW handling with long-term R/O pinning in private
>> mappings, and pinning of anonymous memory in general, is tested by the
>> COW selftest. This test, therefore, focuses on page pinning in
>> file mappings.
>>
>> The most interesting case is probably the "local tmpfile" case, as that
>> will likely end up on a "real" filesystem such as ext4 or xfs, not on a
>> virtual one like tmpfs or hugetlb where any long-term page pinning is
>> always expected to succeed.
>>
>> For now, only add tests that use the "/sys/kernel/debug/gup_test"
>> interface. We'll add tests based on liburing separately next.
>>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> ---

[...]

>> +static void do_test(int fd, size_t size, enum test_type type, bool shared)
>> +{
>> +	__fsword_t fs_type = get_fs_type(fd);
>> +	bool should_work;
>> +	char *mem;
>> +	int ret;
>> +
>> +	if (ftruncate(fd, size)) {
>> +		ksft_test_result_fail("ftruncate() failed\n");
>> +		return;
>> +	}
>> +
>> +	if (fallocate(fd, 0, 0, size)) {
>> +		if (size == pagesize)
>> +			ksft_test_result_fail("fallocate() failed\n");
>> +		else
>> +			ksft_test_result_skip("need more free huge pages\n");
>> +		return;
>> +	}
>> +
>> +	mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
>> +		   shared ? MAP_SHARED : MAP_PRIVATE, fd, 0);
>> +	if (mem == MAP_FAILED) {
>> +		if (size == pagesize || shared)
>> +			ksft_test_result_fail("mmap() failed\n");
>> +		else
>> +			ksft_test_result_skip("need more free huge pages\n");
>> +		return;
>> +	}
>> +
>> +	/*
>> +	 * Fault in the page writable such that GUP-fast can eventually pin
>> +	 * it immediately.
>> +	 */
>> +	memset(mem, 0, size);
> 

For shared mappings, MAP_POPULATE will not fault-in the pages writable. 
See mm/gup.c:populate_vma_page_range().

[There is also the case that mmap() doesn't fail if populate fails, but 
that's only a side note regarding weird semantics of MAP_POPULATE]

[...]

>> +	int flags = MFD_HUGETLB;
>> +	int fd;
>> +
>> +	ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
>> +		       hugetlbsize / 1024);
>> +
>> +	flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
> 
> Hm this feels a little cute :)

It's a weird interfacing, having to specify the desired size via flags 
... see the man page of memfd_create, which links to the man page of 
mmap: "the desired huge page size can be configured by encoding the 
base-2 logarithm of the desired page size in the six bits at the offset 
MAP_HUGE_SHIFT".

FWIW, we're using the same approach in cow.c already [and other memfd 
users like QEMU do it just like that, using ctz].

[...]

>> diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
>> index 4893eb60d96d..b6b1eb6a8a6b 100644
>> --- a/tools/testing/selftests/mm/run_vmtests.sh
>> +++ b/tools/testing/selftests/mm/run_vmtests.sh
>> @@ -24,7 +24,7 @@ separated by spaces:
>>   - mmap
>>   	tests for mmap(2)
>>   - gup_test
>> -	tests for gup using gup_test interface
>> +	tests for gup
> 
> Super nitty again, but I'm guessing this means the CONFIG_GUP_TEST
> interface, perhaps worth keeping?

With this patch, agreed. But not longer with the next patch -- guess I 
simplified when splitting it up. If there are no strong feelings I'll 
leave it in this patch.

[...]

>>
> 
> OK this patch is really nice + well implemented, I can only point out a
> couple EXTREMELY nitty comments :) Thanks very much for adding a test for
> this, it's super useful!
> 
> Therefore,
> 
> Reviewed-by: Lorenzo Stoakes <lstoakes@gmail.com>
> 

Thanks for the review! My selftest patches rarely get that much 
attention, so highly appreciated :)
Lorenzo Stoakes June 1, 2023, 9:41 p.m. UTC | #3
On Thu, Jun 01, 2023 at 10:16:41AM +0200, David Hildenbrand wrote:
> On 28.05.23 17:03, Lorenzo Stoakes wrote:
> > On Fri, May 19, 2023 at 12:27:22PM +0200, David Hildenbrand wrote:
> > > Let's add a new test for checking whether GUP long-term page pinning
> > > works as expected (R/O vs. R/W, MAP_PRIVATE vs. MAP_SHARED, GUP vs.
> > > GUP-fast). Note that COW handling with long-term R/O pinning in private
> > > mappings, and pinning of anonymous memory in general, is tested by the
> > > COW selftest. This test, therefore, focuses on page pinning in
> > > file mappings.
> > >
> > > The most interesting case is probably the "local tmpfile" case, as that
> > > will likely end up on a "real" filesystem such as ext4 or xfs, not on a
> > > virtual one like tmpfs or hugetlb where any long-term page pinning is
> > > always expected to succeed.
> > >
> > > For now, only add tests that use the "/sys/kernel/debug/gup_test"
> > > interface. We'll add tests based on liburing separately next.
> > >
> > > Signed-off-by: David Hildenbrand <david@redhat.com>
> > > ---
>
> [...]
>
> > > +static void do_test(int fd, size_t size, enum test_type type, bool shared)
> > > +{
> > > +	__fsword_t fs_type = get_fs_type(fd);
> > > +	bool should_work;
> > > +	char *mem;
> > > +	int ret;
> > > +
> > > +	if (ftruncate(fd, size)) {
> > > +		ksft_test_result_fail("ftruncate() failed\n");
> > > +		return;
> > > +	}
> > > +
> > > +	if (fallocate(fd, 0, 0, size)) {
> > > +		if (size == pagesize)
> > > +			ksft_test_result_fail("fallocate() failed\n");
> > > +		else
> > > +			ksft_test_result_skip("need more free huge pages\n");
> > > +		return;
> > > +	}
> > > +
> > > +	mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
> > > +		   shared ? MAP_SHARED : MAP_PRIVATE, fd, 0);
> > > +	if (mem == MAP_FAILED) {
> > > +		if (size == pagesize || shared)
> > > +			ksft_test_result_fail("mmap() failed\n");
> > > +		else
> > > +			ksft_test_result_skip("need more free huge pages\n");
> > > +		return;
> > > +	}
> > > +
> > > +	/*
> > > +	 * Fault in the page writable such that GUP-fast can eventually pin
> > > +	 * it immediately.
> > > +	 */
> > > +	memset(mem, 0, size);
> >
>
> For shared mappings, MAP_POPULATE will not fault-in the pages writable. See
> mm/gup.c:populate_vma_page_range().

Ughhh yeah, I was aware but hadn't considered the shared case, here. Fair
enough.

>
> [There is also the case that mmap() doesn't fail if populate fails, but
> that's only a side note regarding weird semantics of MAP_POPULATE]

Yes this is... a thing. And mm_populate() explicitly (void)-casting
__mm_populate() is the cherry on that particular cake :)

>
> [...]
>
> > > +	int flags = MFD_HUGETLB;
> > > +	int fd;
> > > +
> > > +	ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
> > > +		       hugetlbsize / 1024);
> > > +
> > > +	flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
> >
> > Hm this feels a little cute :)
>
> It's a weird interfacing, having to specify the desired size via flags ...
> see the man page of memfd_create, which links to the man page of mmap: "the
> desired huge page size can be configured by encoding the base-2 logarithm of
> the desired page size in the six bits at the offset MAP_HUGE_SHIFT".
>
> FWIW, we're using the same approach in cow.c already [and other memfd users
> like QEMU do it just like that, using ctz].

Ack, yeah I had assumed so, just felt slightly odd. Thanks for the
explanation!

>
> [...]
>
> > > diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
> > > index 4893eb60d96d..b6b1eb6a8a6b 100644
> > > --- a/tools/testing/selftests/mm/run_vmtests.sh
> > > +++ b/tools/testing/selftests/mm/run_vmtests.sh
> > > @@ -24,7 +24,7 @@ separated by spaces:
> > >   - mmap
> > >   	tests for mmap(2)
> > >   - gup_test
> > > -	tests for gup using gup_test interface
> > > +	tests for gup
> >
> > Super nitty again, but I'm guessing this means the CONFIG_GUP_TEST
> > interface, perhaps worth keeping?
>
> With this patch, agreed. But not longer with the next patch -- guess I
> simplified when splitting it up. If there are no strong feelings I'll leave
> it in this patch.
>
> [...]
>
> > >
> >
> > OK this patch is really nice + well implemented, I can only point out a
> > couple EXTREMELY nitty comments :) Thanks very much for adding a test for
> > this, it's super useful!
> >
> > Therefore,
> >
> > Reviewed-by: Lorenzo Stoakes <lstoakes@gmail.com>
> >
>
> Thanks for the review! My selftest patches rarely get that much attention,
> so highly appreciated :)

No worries, this is very much in my wheelhouse (relating directly to my
recent GUP series) so this is actually very useful and relevant to me. Also
I am very much in favour of improved test coverage, is a bug bear of mine.

>
> --
> Thanks,
>
> David / dhildenb
>
John Hubbard June 6, 2023, 6:23 a.m. UTC | #4
On 5/19/23 03:27, David Hildenbrand wrote:
...
> diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
> index 23af4633f0f4..95acb099315e 100644
> --- a/tools/testing/selftests/mm/Makefile
> +++ b/tools/testing/selftests/mm/Makefile
> @@ -34,6 +34,7 @@ LDLIBS = -lrt -lpthread
>  
>  TEST_GEN_PROGS = cow
>  TEST_GEN_PROGS += compaction_test
> +TEST_GEN_PROGS += gup_longterm

Hi David,

Peter Xu just pointed out that we need a .gitignore entry for
gup_longterm [1]. That logically belongs in this patch, I think.


[1] https://lore.kernel.org/all/ZH4FFa4FV9KGwBZo@x1n

thanks,
David Hildenbrand June 6, 2023, 7:10 a.m. UTC | #5
On 06.06.23 08:23, John Hubbard wrote:
> On 5/19/23 03:27, David Hildenbrand wrote:
> ...
>> diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
>> index 23af4633f0f4..95acb099315e 100644
>> --- a/tools/testing/selftests/mm/Makefile
>> +++ b/tools/testing/selftests/mm/Makefile
>> @@ -34,6 +34,7 @@ LDLIBS = -lrt -lpthread
>>   
>>   TEST_GEN_PROGS = cow
>>   TEST_GEN_PROGS += compaction_test
>> +TEST_GEN_PROGS += gup_longterm
> 
> Hi David,
> 
> Peter Xu just pointed out that we need a .gitignore entry for
> gup_longterm [1]. That logically belongs in this patch, I think.

Yes, although I don't care if it's in a follow-up patch (we're talking 
testcases after all).

@Andrew can you include that hunk in that patch or do you want a resend?
Andrew Morton June 7, 2023, 2:42 a.m. UTC | #6
On Tue, 6 Jun 2023 09:10:22 +0200 David Hildenbrand <david@redhat.com> wrote:

> On 06.06.23 08:23, John Hubbard wrote:
> > On 5/19/23 03:27, David Hildenbrand wrote:
> > ...
> >> diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
> >> index 23af4633f0f4..95acb099315e 100644
> >> --- a/tools/testing/selftests/mm/Makefile
> >> +++ b/tools/testing/selftests/mm/Makefile
> >> @@ -34,6 +34,7 @@ LDLIBS = -lrt -lpthread
> >>   
> >>   TEST_GEN_PROGS = cow
> >>   TEST_GEN_PROGS += compaction_test
> >> +TEST_GEN_PROGS += gup_longterm
> > 
> > Hi David,
> > 
> > Peter Xu just pointed out that we need a .gitignore entry for
> > gup_longterm [1]. That logically belongs in this patch, I think.
> 
> Yes, although I don't care if it's in a follow-up patch (we're talking 
> testcases after all).
> 
> @Andrew can you include that hunk in that patch or do you want a resend?

I added this:

From: Andrew Morton <akpm@linux-foundation.org>
Subject: selftests-mm-gup_longterm-new-functional-test-for-foll_longterm-fix
Date: Tue Jun  6 07:41:28 PM PDT 2023

update .gitignore for gup_longterm, per Peter

Cc: David Hildenbrand <david@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 tools/testing/selftests/mm/.gitignore |    1 +
 1 file changed, 1 insertion(+)

--- a/tools/testing/selftests/mm/.gitignore~selftests-mm-gup_longterm-new-functional-test-for-foll_longterm-fix
+++ a/tools/testing/selftests/mm/.gitignore
@@ -39,3 +39,4 @@ local_config.h
 local_config.mk
 ksm_functional_tests
 mdwe_test
+gup_longterm
diff mbox series

Patch

diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 23af4633f0f4..95acb099315e 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -34,6 +34,7 @@  LDLIBS = -lrt -lpthread
 
 TEST_GEN_PROGS = cow
 TEST_GEN_PROGS += compaction_test
+TEST_GEN_PROGS += gup_longterm
 TEST_GEN_PROGS += gup_test
 TEST_GEN_PROGS += hmm-tests
 TEST_GEN_PROGS += hugetlb-madvise
@@ -164,6 +165,8 @@  endif
 # IOURING_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
 $(OUTPUT)/cow: LDLIBS += $(IOURING_EXTRA_LIBS)
 
+$(OUTPUT)/gup_longterm: LDLIBS += $(IOURING_EXTRA_LIBS)
+
 $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
 
 $(OUTPUT)/ksm_tests: LDLIBS += -lnuma
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
new file mode 100644
index 000000000000..44a3617fd423
--- /dev/null
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -0,0 +1,386 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * GUP long-term page pinning tests.
+ *
+ * Copyright 2023, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/vfs.h>
+#include <linux/magic.h>
+#include <linux/memfd.h>
+
+#include "local_config.h"
+
+#include "../../../../mm/gup_test.h"
+#include "../kselftest.h"
+#include "vm_util.h"
+
+static size_t pagesize;
+static int nr_hugetlbsizes;
+static size_t hugetlbsizes[10];
+static int gup_fd;
+
+static __fsword_t get_fs_type(int fd)
+{
+	struct statfs fs;
+	int ret;
+
+	do {
+		ret = fstatfs(fd, &fs);
+	} while (ret && errno == EINTR);
+
+	return ret ? 0 : fs.f_type;
+}
+
+static bool fs_is_unknown(__fsword_t fs_type)
+{
+	/*
+	 * We only support some filesystems in our tests when dealing with
+	 * R/W long-term pinning. For these filesystems, we can be fairly sure
+	 * whether they support it or not.
+	 */
+	switch (fs_type) {
+	case TMPFS_MAGIC:
+	case HUGETLBFS_MAGIC:
+	case BTRFS_SUPER_MAGIC:
+	case EXT4_SUPER_MAGIC:
+	case XFS_SUPER_MAGIC:
+		return false;
+	default:
+		return true;
+	}
+}
+
+static bool fs_supports_writable_longterm_pinning(__fsword_t fs_type)
+{
+	assert(!fs_is_unknown(fs_type));
+	switch (fs_type) {
+	case TMPFS_MAGIC:
+	case HUGETLBFS_MAGIC:
+		return true;
+	default:
+		return false;
+	}
+}
+
+enum test_type {
+	TEST_TYPE_RO,
+	TEST_TYPE_RO_FAST,
+	TEST_TYPE_RW,
+	TEST_TYPE_RW_FAST,
+};
+
+static void do_test(int fd, size_t size, enum test_type type, bool shared)
+{
+	__fsword_t fs_type = get_fs_type(fd);
+	bool should_work;
+	char *mem;
+	int ret;
+
+	if (ftruncate(fd, size)) {
+		ksft_test_result_fail("ftruncate() failed\n");
+		return;
+	}
+
+	if (fallocate(fd, 0, 0, size)) {
+		if (size == pagesize)
+			ksft_test_result_fail("fallocate() failed\n");
+		else
+			ksft_test_result_skip("need more free huge pages\n");
+		return;
+	}
+
+	mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
+		   shared ? MAP_SHARED : MAP_PRIVATE, fd, 0);
+	if (mem == MAP_FAILED) {
+		if (size == pagesize || shared)
+			ksft_test_result_fail("mmap() failed\n");
+		else
+			ksft_test_result_skip("need more free huge pages\n");
+		return;
+	}
+
+	/*
+	 * Fault in the page writable such that GUP-fast can eventually pin
+	 * it immediately.
+	 */
+	memset(mem, 0, size);
+
+	switch (type) {
+	case TEST_TYPE_RO:
+	case TEST_TYPE_RO_FAST:
+	case TEST_TYPE_RW:
+	case TEST_TYPE_RW_FAST: {
+		struct pin_longterm_test args;
+		const bool fast = type == TEST_TYPE_RO_FAST ||
+				  type == TEST_TYPE_RW_FAST;
+		const bool rw = type == TEST_TYPE_RW ||
+				type == TEST_TYPE_RW_FAST;
+
+		if (gup_fd < 0) {
+			ksft_test_result_skip("gup_test not available\n");
+			break;
+		}
+
+		if (rw && shared && fs_is_unknown(fs_type)) {
+			ksft_test_result_skip("Unknown filesystem\n");
+			return;
+		}
+		/*
+		 * R/O pinning or pinning in a private mapping is always
+		 * expected to work. Otherwise, we expect long-term R/W pinning
+		 * to only succeed for special fielesystems.
+		 */
+		should_work = !shared || !rw ||
+			      fs_supports_writable_longterm_pinning(fs_type);
+
+		args.addr = (__u64)(uintptr_t)mem;
+		args.size = size;
+		args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
+		args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0;
+		ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
+		if (ret && errno == EINVAL) {
+			ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
+			break;
+		} else if (ret && errno == EFAULT) {
+			ksft_test_result(!should_work, "Should have failed\n");
+			break;
+		} else if (ret) {
+			ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
+			break;
+		}
+
+		if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP))
+			ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
+
+		/*
+		 * TODO: if the kernel ever supports long-term R/W pinning on
+		 * some previously unsupported filesystems, we might want to
+		 * perform some additional tests for possible data corruptions.
+		 */
+		ksft_test_result(should_work, "Should have worked\n");
+		break;
+	}
+	default:
+		assert(false);
+	}
+
+	munmap(mem, size);
+}
+
+typedef void (*test_fn)(int fd, size_t size);
+
+static void run_with_memfd(test_fn fn, const char *desc)
+{
+	int fd;
+
+	ksft_print_msg("[RUN] %s ... with memfd\n", desc);
+
+	fd = memfd_create("test", 0);
+	if (fd < 0) {
+		ksft_test_result_fail("memfd_create() failed\n");
+		return;
+	}
+
+	fn(fd, pagesize);
+	close(fd);
+}
+
+static void run_with_tmpfile(test_fn fn, const char *desc)
+{
+	FILE *file;
+	int fd;
+
+	ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
+
+	file = tmpfile();
+	if (!file) {
+		ksft_test_result_fail("tmpfile() failed\n");
+		return;
+	}
+
+	fd = fileno(file);
+	if (fd < 0) {
+		ksft_test_result_fail("fileno() failed\n");
+		return;
+	}
+
+	fn(fd, pagesize);
+	fclose(file);
+}
+
+static void run_with_local_tmpfile(test_fn fn, const char *desc)
+{
+	char filename[] = __FILE__"_tmpfile_XXXXXX";
+	int fd;
+
+	ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc);
+
+	fd = mkstemp(filename);
+	if (fd < 0) {
+		ksft_test_result_fail("mkstemp() failed\n");
+		return;
+	}
+
+	if (unlink(filename)) {
+		ksft_test_result_fail("unlink() failed\n");
+		goto close;
+	}
+
+	fn(fd, pagesize);
+close:
+	close(fd);
+}
+
+static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
+				   size_t hugetlbsize)
+{
+	int flags = MFD_HUGETLB;
+	int fd;
+
+	ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
+		       hugetlbsize / 1024);
+
+	flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
+
+	fd = memfd_create("test", flags);
+	if (fd < 0) {
+		ksft_test_result_skip("memfd_create() failed\n");
+		return;
+	}
+
+	fn(fd, hugetlbsize);
+	close(fd);
+}
+
+struct test_case {
+	const char *desc;
+	test_fn fn;
+};
+
+static void test_shared_rw_pin(int fd, size_t size)
+{
+	do_test(fd, size, TEST_TYPE_RW, true);
+}
+
+static void test_shared_rw_fast_pin(int fd, size_t size)
+{
+	do_test(fd, size, TEST_TYPE_RW_FAST, true);
+}
+
+static void test_shared_ro_pin(int fd, size_t size)
+{
+	do_test(fd, size, TEST_TYPE_RO, true);
+}
+
+static void test_shared_ro_fast_pin(int fd, size_t size)
+{
+	do_test(fd, size, TEST_TYPE_RO_FAST, true);
+}
+
+static void test_private_rw_pin(int fd, size_t size)
+{
+	do_test(fd, size, TEST_TYPE_RW, false);
+}
+
+static void test_private_rw_fast_pin(int fd, size_t size)
+{
+	do_test(fd, size, TEST_TYPE_RW_FAST, false);
+}
+
+static void test_private_ro_pin(int fd, size_t size)
+{
+	do_test(fd, size, TEST_TYPE_RO, false);
+}
+
+static void test_private_ro_fast_pin(int fd, size_t size)
+{
+	do_test(fd, size, TEST_TYPE_RO_FAST, false);
+}
+
+static const struct test_case test_cases[] = {
+	{
+		"R/W longterm GUP pin in MAP_SHARED file mapping",
+		test_shared_rw_pin,
+	},
+	{
+		"R/W longterm GUP-fast pin in MAP_SHARED file mapping",
+		test_shared_rw_fast_pin,
+	},
+	{
+		"R/O longterm GUP pin in MAP_SHARED file mapping",
+		test_shared_ro_pin,
+	},
+	{
+		"R/O longterm GUP-fast pin in MAP_SHARED file mapping",
+		test_shared_ro_fast_pin,
+	},
+	{
+		"R/W longterm GUP pin in MAP_PRIVATE file mapping",
+		test_private_rw_pin,
+	},
+	{
+		"R/W longterm GUP-fast pin in MAP_PRIVATE file mapping",
+		test_private_rw_fast_pin,
+	},
+	{
+		"R/O longterm GUP pin in MAP_PRIVATE file mapping",
+		test_private_ro_pin,
+	},
+	{
+		"R/O longterm GUP-fast pin in MAP_PRIVATE file mapping",
+		test_private_ro_fast_pin,
+	},
+};
+
+static void run_test_case(struct test_case const *test_case)
+{
+	int i;
+
+	run_with_memfd(test_case->fn, test_case->desc);
+	run_with_tmpfile(test_case->fn, test_case->desc);
+	run_with_local_tmpfile(test_case->fn, test_case->desc);
+	for (i = 0; i < nr_hugetlbsizes; i++)
+		run_with_memfd_hugetlb(test_case->fn, test_case->desc,
+				       hugetlbsizes[i]);
+}
+
+static int tests_per_test_case(void)
+{
+	return 3 + nr_hugetlbsizes;
+}
+
+int main(int argc, char **argv)
+{
+	int i, err;
+
+	pagesize = getpagesize();
+	nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
+						    ARRAY_SIZE(hugetlbsizes));
+
+	ksft_print_header();
+	ksft_set_plan(ARRAY_SIZE(test_cases) * tests_per_test_case());
+
+	gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+
+	for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+		run_test_case(&test_cases[i]);
+
+	err = ksft_get_fail_cnt();
+	if (err)
+		ksft_exit_fail_msg("%d out of %d tests failed\n",
+				   err, ksft_test_num());
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 4893eb60d96d..b6b1eb6a8a6b 100644
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -24,7 +24,7 @@  separated by spaces:
 - mmap
 	tests for mmap(2)
 - gup_test
-	tests for gup using gup_test interface
+	tests for gup
 - userfaultfd
 	tests for  userfaultfd(2)
 - compaction
@@ -196,6 +196,8 @@  CATEGORY="gup_test" run_test ./gup_test -a
 # Dump pages 0, 19, and 4096, using pin_user_pages:
 CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000
 
+CATEGORY="gup_test" run_test ./gup_longterm
+
 CATEGORY="userfaultfd" run_test ./uffd-unit-tests
 uffd_stress_bin=./uffd-stress
 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16