diff mbox series

[2/2] selftests/hmm-tests: Add test for dirty bits

Message ID 8f19b172d32be2e889b837f88b1ba070bf2c97ee.1660281458.git-series.apopple@nvidia.com (mailing list archive)
State New
Headers show
Series [1/2] mm/migrate_device.c: Copy pte dirty bit to page | expand

Commit Message

Alistair Popple Aug. 12, 2022, 5:22 a.m. UTC
We were not correctly copying PTE dirty bits to pages during
migrate_vma_setup() calls. This could potentially lead to data loss, so
add a test for this.

Signed-off-by: Alistair Popple <apopple@nvidia.com>
---
 tools/testing/selftests/vm/hmm-tests.c | 124 ++++++++++++++++++++++++++-
 1 file changed, 124 insertions(+)

Comments

Mika Penttilä Aug. 12, 2022, 7:58 a.m. UTC | #1
Hi Alistair!

On 12.8.2022 8.22, Alistair Popple wrote:
> We were not correctly copying PTE dirty bits to pages during
> migrate_vma_setup() calls. This could potentially lead to data loss, so
> add a test for this.
> 
> Signed-off-by: Alistair Popple <apopple@nvidia.com>
> ---
>   tools/testing/selftests/vm/hmm-tests.c | 124 ++++++++++++++++++++++++++-
>   1 file changed, 124 insertions(+)
> 
> diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
> index 529f53b..70fdb49 100644
> --- a/tools/testing/selftests/vm/hmm-tests.c
> +++ b/tools/testing/selftests/vm/hmm-tests.c
> @@ -1200,6 +1200,130 @@ TEST_F(hmm, migrate_multiple)
>   	}
>   }
>   
> +static char cgroup[] = "/sys/fs/cgroup/hmm-test-XXXXXX";
> +static int write_cgroup_param(char *cgroup_path, char *param, long value)
> +{
> +	int ret;
> +	FILE *f;
> +	char *filename;
> +
> +	if (asprintf(&filename, "%s/%s", cgroup_path, param) < 0)
> +		return -1;
> +
> +	f = fopen(filename, "w");
> +	if (!f) {
> +		ret = -1;
> +		goto out;
> +	}
> +
> +	ret = fprintf(f, "%ld\n", value);
> +	if (ret < 0)
> +		goto out1;
> +
> +	ret = 0;
> +
> +out1:
> +	fclose(f);
> +out:
> +	free(filename);
> +
> +	return ret;
> +}
> +
> +static int setup_cgroup(void)
> +{
> +	pid_t pid = getpid();
> +	int ret;
> +
> +	if (!mkdtemp(cgroup))
> +		return -1;
> +
> +	ret = write_cgroup_param(cgroup, "cgroup.procs", pid);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static int destroy_cgroup(void)
> +{
> +	pid_t pid = getpid();
> +	int ret;
> +
> +	ret = write_cgroup_param("/sys/fs/cgroup/cgroup.procs",
> +				"cgroup.proc", pid);
> +	if (ret)
> +		return ret;
> +
> +	if (rmdir(cgroup))
> +		return -1;
> +
> +	return 0;
> +}
> +
> +/*
> + * Try and migrate a dirty page that has previously been swapped to disk. This
> + * checks that we don't loose dirty bits.
> + */
> +TEST_F(hmm, migrate_dirty_page)
> +{
> +	struct hmm_buffer *buffer;
> +	unsigned long npages;
> +	unsigned long size;
> +	unsigned long i;
> +	int *ptr;
> +	int tmp = 0;
> +
> +	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
> +	ASSERT_NE(npages, 0);
> +	size = npages << self->page_shift;
> +
> +	buffer = malloc(sizeof(*buffer));
> +	ASSERT_NE(buffer, NULL);
> +
> +	buffer->fd = -1;
> +	buffer->size = size;
> +	buffer->mirror = malloc(size);
> +	ASSERT_NE(buffer->mirror, NULL);
> +
> +	ASSERT_EQ(setup_cgroup(), 0);
> +
> +	buffer->ptr = mmap(NULL, size,
> +			   PROT_READ | PROT_WRITE,
> +			   MAP_PRIVATE | MAP_ANONYMOUS,
> +			   buffer->fd, 0);
> +	ASSERT_NE(buffer->ptr, MAP_FAILED);
> +
> +	/* Initialize buffer in system memory. */
> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
> +		ptr[i] = 0;
> +
> +	ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
> +
> +	/* Fault pages back in from swap as clean pages */
> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
> +		tmp += ptr[i];
> +
> +	/* Dirty the pte */
> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
> +		ptr[i] = i;
> +

The anon pages are quite likely in memory at this point, and dirty in pte.



> +	/*
> +	 * Attempt to migrate memory to device, which should fail because
> +	 * hopefully some pages are backed by swap storage.
> +	 */
> +	ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages));


And pages marked dirty also now. But could you elaborate how and where 
the above fails in more detail, couldn't immediately see it...



> +
> +	ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
> +
> +	/* Check we still see the updated data after restoring from swap. */
> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
> +		ASSERT_EQ(ptr[i], i);
> +
> +	hmm_buffer_free(buffer);
> +	destroy_cgroup();
> +}
> +
>   /*
>    * Read anonymous memory multiple times.
>    */


--Mika
Alistair Popple Aug. 15, 2022, 2:35 a.m. UTC | #2
Mika Penttilä <mpenttil@redhat.com> writes:

> Hi Alistair!
>
> On 12.8.2022 8.22, Alistair Popple wrote:

[...]

>> +	buffer->ptr = mmap(NULL, size,
>> +			   PROT_READ | PROT_WRITE,
>> +			   MAP_PRIVATE | MAP_ANONYMOUS,
>> +			   buffer->fd, 0);
>> +	ASSERT_NE(buffer->ptr, MAP_FAILED);
>> +
>> +	/* Initialize buffer in system memory. */
>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>> +		ptr[i] = 0;
>> +
>> +	ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>> +
>> +	/* Fault pages back in from swap as clean pages */
>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>> +		tmp += ptr[i];
>> +
>> +	/* Dirty the pte */
>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>> +		ptr[i] = i;
>> +
>
> The anon pages are quite likely in memory at this point, and dirty in pte.

Why would the pte be dirty? I just confirmed using some modified pagemap
code that on my system at least this isn't the case.

>> +	/*
>> +	 * Attempt to migrate memory to device, which should fail because
>> +	 * hopefully some pages are backed by swap storage.
>> +	 */
>> +	ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages));
>
> And pages marked dirty also now. But could you elaborate how and where the above
> fails in more detail, couldn't immediately see it...

Not if you don't have patch 1 of this series applied. If the
trylock_page() in migrate_vma_collect_pmd() succeeds (which it almost
always does) it will have cleared the pte without setting PageDirty.

So now we have a dirty page without PageDirty set and without a dirty
pte. If this page gets swapped back to disk and is still in the swap
cache data will be lost because reclaim will see a clean page and won't
write it out again.

At least that's my understanding - please let me know if you see
something that doesn't make sense.

>> +
>> +	ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>> +
>> +	/* Check we still see the updated data after restoring from swap. */
>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>> +		ASSERT_EQ(ptr[i], i);
>> +
>> +	hmm_buffer_free(buffer);
>> +	destroy_cgroup();
>> +}
>> +
>>   /*
>>    * Read anonymous memory multiple times.
>>    */
>
>
> --Mika
Mika Penttilä Aug. 15, 2022, 3:11 a.m. UTC | #3
On 15.8.2022 5.35, Alistair Popple wrote:
> 
> Mika Penttilä <mpenttil@redhat.com> writes:
> 
>> Hi Alistair!
>>
>> On 12.8.2022 8.22, Alistair Popple wrote:
> 
> [...]
> 
>>> +	buffer->ptr = mmap(NULL, size,
>>> +			   PROT_READ | PROT_WRITE,
>>> +			   MAP_PRIVATE | MAP_ANONYMOUS,
>>> +			   buffer->fd, 0);
>>> +	ASSERT_NE(buffer->ptr, MAP_FAILED);
>>> +
>>> +	/* Initialize buffer in system memory. */
>>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>> +		ptr[i] = 0;
>>> +
>>> +	ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>>> +
>>> +	/* Fault pages back in from swap as clean pages */
>>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>> +		tmp += ptr[i];
>>> +
>>> +	/* Dirty the pte */
>>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>> +		ptr[i] = i;
>>> +
>>
>> The anon pages are quite likely in memory at this point, and dirty in pte.
> 
> Why would the pte be dirty? I just confirmed using some modified pagemap
> code that on my system at least this isn't the case.
> 
>>> +	/*
>>> +	 * Attempt to migrate memory to device, which should fail because
>>> +	 * hopefully some pages are backed by swap storage.
>>> +	 */
>>> +	ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages));
>>
>> And pages marked dirty also now. But could you elaborate how and where the above
>> fails in more detail, couldn't immediately see it...
> 
> Not if you don't have patch 1 of this series applied. If the
> trylock_page() in migrate_vma_collect_pmd() succeeds (which it almost
> always does) it will have cleared the pte without setting PageDirty.
> 

Ah yes but I meant with the patch 1 applied, the comment "Attempt to 
migrate memory to device, which should fail because hopefully some pages 
are backed by swap storage" indicates that hmm_migrate_sys_to_dev() 
would fail..and there's that ASSERT_TRUE which means fail here.

So I understand the data loss but where is the hmm_migrate_sys_to_dev() 
failing, with or wihtout patch 1 applied?




> So now we have a dirty page without PageDirty set and without a dirty
> pte. If this page gets swapped back to disk and is still in the swap
> cache data will be lost because reclaim will see a clean page and won't
> write it out again.
> 
> At least that's my understanding - please let me know if you see
> something that doesn't make sense.
> 
>>> +
>>> +	ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>>> +
>>> +	/* Check we still see the updated data after restoring from swap. */
>>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>> +		ASSERT_EQ(ptr[i], i);
>>> +
>>> +	hmm_buffer_free(buffer);
>>> +	destroy_cgroup();
>>> +}
>>> +
>>>    /*
>>>     * Read anonymous memory multiple times.
>>>     */
>>
>>
>> --Mika
>
Alistair Popple Aug. 15, 2022, 3:21 a.m. UTC | #4
Mika Penttilä <mpenttil@redhat.com> writes:

> On 15.8.2022 5.35, Alistair Popple wrote:
>> Mika Penttilä <mpenttil@redhat.com> writes:
>>
>>> Hi Alistair!
>>>
>>> On 12.8.2022 8.22, Alistair Popple wrote:
>> [...]
>>
>>>> +	buffer->ptr = mmap(NULL, size,
>>>> +			   PROT_READ | PROT_WRITE,
>>>> +			   MAP_PRIVATE | MAP_ANONYMOUS,
>>>> +			   buffer->fd, 0);
>>>> +	ASSERT_NE(buffer->ptr, MAP_FAILED);
>>>> +
>>>> +	/* Initialize buffer in system memory. */
>>>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>> +		ptr[i] = 0;
>>>> +
>>>> +	ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>>>> +
>>>> +	/* Fault pages back in from swap as clean pages */
>>>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>> +		tmp += ptr[i];
>>>> +
>>>> +	/* Dirty the pte */
>>>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>> +		ptr[i] = i;
>>>> +
>>>
>>> The anon pages are quite likely in memory at this point, and dirty in pte.
>> Why would the pte be dirty? I just confirmed using some modified pagemap
>> code that on my system at least this isn't the case.
>>
>>>> +	/*
>>>> +	 * Attempt to migrate memory to device, which should fail because
>>>> +	 * hopefully some pages are backed by swap storage.
>>>> +	 */
>>>> +	ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages));
>>>
>>> And pages marked dirty also now. But could you elaborate how and where the above
>>> fails in more detail, couldn't immediately see it...
>> Not if you don't have patch 1 of this series applied. If the
>> trylock_page() in migrate_vma_collect_pmd() succeeds (which it almost
>> always does) it will have cleared the pte without setting PageDirty.
>>
>
> Ah yes but I meant with the patch 1 applied, the comment "Attempt to migrate
> memory to device, which should fail because hopefully some pages are backed by
> swap storage" indicates that hmm_migrate_sys_to_dev() would fail..and there's
> that ASSERT_TRUE which means fail here.
>
> So I understand the data loss but where is the hmm_migrate_sys_to_dev() failing,
> with or wihtout patch 1 applied?

Oh right. hmm_migrate_sys_to_dev() will fail because the page is in the
swap cache, and migrate_vma_*() doesn't currently support migrating
pages with a mapping.

>> So now we have a dirty page without PageDirty set and without a dirty
>> pte. If this page gets swapped back to disk and is still in the swap
>> cache data will be lost because reclaim will see a clean page and won't
>> write it out again.
>> At least that's my understanding - please let me know if you see
>> something that doesn't make sense.
>>
>>>> +
>>>> +	ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>>>> +
>>>> +	/* Check we still see the updated data after restoring from swap. */
>>>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>> +		ASSERT_EQ(ptr[i], i);
>>>> +
>>>> +	hmm_buffer_free(buffer);
>>>> +	destroy_cgroup();
>>>> +}
>>>> +
>>>>    /*
>>>>     * Read anonymous memory multiple times.
>>>>     */
>>>
>>>
>>> --Mika
>>
Mika Penttilä Aug. 15, 2022, 4:05 a.m. UTC | #5
On 15.8.2022 6.21, Alistair Popple wrote:
> 
> Mika Penttilä <mpenttil@redhat.com> writes:
> 
>> On 15.8.2022 5.35, Alistair Popple wrote:
>>> Mika Penttilä <mpenttil@redhat.com> writes:
>>>
>>>> Hi Alistair!
>>>>
>>>> On 12.8.2022 8.22, Alistair Popple wrote:
>>> [...]
>>>
>>>>> +	buffer->ptr = mmap(NULL, size,
>>>>> +			   PROT_READ | PROT_WRITE,
>>>>> +			   MAP_PRIVATE | MAP_ANONYMOUS,
>>>>> +			   buffer->fd, 0);
>>>>> +	ASSERT_NE(buffer->ptr, MAP_FAILED);
>>>>> +
>>>>> +	/* Initialize buffer in system memory. */
>>>>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>> +		ptr[i] = 0;
>>>>> +
>>>>> +	ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>>>>> +
>>>>> +	/* Fault pages back in from swap as clean pages */
>>>>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>> +		tmp += ptr[i];
>>>>> +
>>>>> +	/* Dirty the pte */
>>>>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>> +		ptr[i] = i;
>>>>> +
>>>>
>>>> The anon pages are quite likely in memory at this point, and dirty in pte.
>>> Why would the pte be dirty? I just confirmed using some modified pagemap
>>> code that on my system at least this isn't the case.
>>>
>>>>> +	/*
>>>>> +	 * Attempt to migrate memory to device, which should fail because
>>>>> +	 * hopefully some pages are backed by swap storage.
>>>>> +	 */
>>>>> +	ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages));
>>>>
>>>> And pages marked dirty also now. But could you elaborate how and where the above
>>>> fails in more detail, couldn't immediately see it...
>>> Not if you don't have patch 1 of this series applied. If the
>>> trylock_page() in migrate_vma_collect_pmd() succeeds (which it almost
>>> always does) it will have cleared the pte without setting PageDirty.
>>>
>>
>> Ah yes but I meant with the patch 1 applied, the comment "Attempt to migrate
>> memory to device, which should fail because hopefully some pages are backed by
>> swap storage" indicates that hmm_migrate_sys_to_dev() would fail..and there's
>> that ASSERT_TRUE which means fail here.
>>
>> So I understand the data loss but where is the hmm_migrate_sys_to_dev() failing,
>> with or wihtout patch 1 applied?
> 
> Oh right. hmm_migrate_sys_to_dev() will fail because the page is in the
> swap cache, and migrate_vma_*() doesn't currently support migrating
> pages with a mapping.
> 

Ok I forgot we skip also page cache pages, not just file pages...




>>> So now we have a dirty page without PageDirty set and without a dirty
>>> pte. If this page gets swapped back to disk and is still in the swap
>>> cache data will be lost because reclaim will see a clean page and won't
>>> write it out again.
>>> At least that's my understanding - please let me know if you see
>>> something that doesn't make sense.
>>>
>>>>> +
>>>>> +	ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>>>>> +
>>>>> +	/* Check we still see the updated data after restoring from swap. */
>>>>> +	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>> +		ASSERT_EQ(ptr[i], i);
>>>>> +
>>>>> +	hmm_buffer_free(buffer);
>>>>> +	destroy_cgroup();
>>>>> +}
>>>>> +
>>>>>     /*
>>>>>      * Read anonymous memory multiple times.
>>>>>      */
>>>>
>>>>
>>>> --Mika
>>>
>
Mika Penttilä Aug. 15, 2022, 4:06 a.m. UTC | #6
On 15.8.2022 7.05, Mika Penttilä wrote:
> 
> 
> On 15.8.2022 6.21, Alistair Popple wrote:
>>
>> Mika Penttilä <mpenttil@redhat.com> writes:
>>
>>> On 15.8.2022 5.35, Alistair Popple wrote:
>>>> Mika Penttilä <mpenttil@redhat.com> writes:
>>>>
>>>>> Hi Alistair!
>>>>>
>>>>> On 12.8.2022 8.22, Alistair Popple wrote:
>>>> [...]
>>>>
>>>>>> +    buffer->ptr = mmap(NULL, size,
>>>>>> +               PROT_READ | PROT_WRITE,
>>>>>> +               MAP_PRIVATE | MAP_ANONYMOUS,
>>>>>> +               buffer->fd, 0);
>>>>>> +    ASSERT_NE(buffer->ptr, MAP_FAILED);
>>>>>> +
>>>>>> +    /* Initialize buffer in system memory. */
>>>>>> +    for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>>> +        ptr[i] = 0;
>>>>>> +
>>>>>> +    ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 
>>>>>> 1UL<<30));
>>>>>> +
>>>>>> +    /* Fault pages back in from swap as clean pages */
>>>>>> +    for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>>> +        tmp += ptr[i];
>>>>>> +
>>>>>> +    /* Dirty the pte */
>>>>>> +    for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>>> +        ptr[i] = i;
>>>>>> +
>>>>>
>>>>> The anon pages are quite likely in memory at this point, and dirty 
>>>>> in pte.
>>>> Why would the pte be dirty? I just confirmed using some modified 
>>>> pagemap
>>>> code that on my system at least this isn't the case.
>>>>
>>>>>> +    /*
>>>>>> +     * Attempt to migrate memory to device, which should fail 
>>>>>> because
>>>>>> +     * hopefully some pages are backed by swap storage.
>>>>>> +     */
>>>>>> +    ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages));
>>>>>
>>>>> And pages marked dirty also now. But could you elaborate how and 
>>>>> where the above
>>>>> fails in more detail, couldn't immediately see it...
>>>> Not if you don't have patch 1 of this series applied. If the
>>>> trylock_page() in migrate_vma_collect_pmd() succeeds (which it almost
>>>> always does) it will have cleared the pte without setting PageDirty.
>>>>
>>>
>>> Ah yes but I meant with the patch 1 applied, the comment "Attempt to 
>>> migrate
>>> memory to device, which should fail because hopefully some pages are 
>>> backed by
>>> swap storage" indicates that hmm_migrate_sys_to_dev() would fail..and 
>>> there's
>>> that ASSERT_TRUE which means fail here.
>>>
>>> So I understand the data loss but where is the 
>>> hmm_migrate_sys_to_dev() failing,
>>> with or wihtout patch 1 applied?
>>
>> Oh right. hmm_migrate_sys_to_dev() will fail because the page is in the
>> swap cache, and migrate_vma_*() doesn't currently support migrating
>> pages with a mapping.
>>
> 
> Ok I forgot we skip also page cache pages, not just file pages...

Meant we skip swap cache pages also, not just file pages..



> 
> 
> 
> 
>>>> So now we have a dirty page without PageDirty set and without a dirty
>>>> pte. If this page gets swapped back to disk and is still in the swap
>>>> cache data will be lost because reclaim will see a clean page and won't
>>>> write it out again.
>>>> At least that's my understanding - please let me know if you see
>>>> something that doesn't make sense.
>>>>
>>>>>> +
>>>>>> +    ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 
>>>>>> 1UL<<30));
>>>>>> +
>>>>>> +    /* Check we still see the updated data after restoring from 
>>>>>> swap. */
>>>>>> +    for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>>> +        ASSERT_EQ(ptr[i], i);
>>>>>> +
>>>>>> +    hmm_buffer_free(buffer);
>>>>>> +    destroy_cgroup();
>>>>>> +}
>>>>>> +
>>>>>>     /*
>>>>>>      * Read anonymous memory multiple times.
>>>>>>      */
>>>>>
>>>>>
>>>>> --Mika
>>>>
>>
diff mbox series

Patch

diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 529f53b..70fdb49 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -1200,6 +1200,130 @@  TEST_F(hmm, migrate_multiple)
 	}
 }
 
+static char cgroup[] = "/sys/fs/cgroup/hmm-test-XXXXXX";
+static int write_cgroup_param(char *cgroup_path, char *param, long value)
+{
+	int ret;
+	FILE *f;
+	char *filename;
+
+	if (asprintf(&filename, "%s/%s", cgroup_path, param) < 0)
+		return -1;
+
+	f = fopen(filename, "w");
+	if (!f) {
+		ret = -1;
+		goto out;
+	}
+
+	ret = fprintf(f, "%ld\n", value);
+	if (ret < 0)
+		goto out1;
+
+	ret = 0;
+
+out1:
+	fclose(f);
+out:
+	free(filename);
+
+	return ret;
+}
+
+static int setup_cgroup(void)
+{
+	pid_t pid = getpid();
+	int ret;
+
+	if (!mkdtemp(cgroup))
+		return -1;
+
+	ret = write_cgroup_param(cgroup, "cgroup.procs", pid);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int destroy_cgroup(void)
+{
+	pid_t pid = getpid();
+	int ret;
+
+	ret = write_cgroup_param("/sys/fs/cgroup/cgroup.procs",
+				"cgroup.proc", pid);
+	if (ret)
+		return ret;
+
+	if (rmdir(cgroup))
+		return -1;
+
+	return 0;
+}
+
+/*
+ * Try and migrate a dirty page that has previously been swapped to disk. This
+ * checks that we don't loose dirty bits.
+ */
+TEST_F(hmm, migrate_dirty_page)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int tmp = 0;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	ASSERT_EQ(setup_cgroup(), 0);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize buffer in system memory. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = 0;
+
+	ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
+
+	/* Fault pages back in from swap as clean pages */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		tmp += ptr[i];
+
+	/* Dirty the pte */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/*
+	 * Attempt to migrate memory to device, which should fail because
+	 * hopefully some pages are backed by swap storage.
+	 */
+	ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages));
+
+	ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
+
+	/* Check we still see the updated data after restoring from swap. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	hmm_buffer_free(buffer);
+	destroy_cgroup();
+}
+
 /*
  * Read anonymous memory multiple times.
  */