diff mbox series

[2/2] selftests/mm: Do not fail test for a single migration failure

Message ID 20240809103129.365029-3-dev.jain@arm.com (mailing list archive)
State New
Headers show
Series Improve migration by backing off earlier | expand

Commit Message

Dev Jain Aug. 9, 2024, 10:31 a.m. UTC
Do not fail the test for just a single instance of migration failure,
since migration is a best-effort service.

Signed-off-by: Dev Jain <dev.jain@arm.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Tested-by: Ryan Roberts <ryan.roberts@arm.com>
---
 tools/testing/selftests/mm/migration.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

Comments

Shuah Khan Aug. 9, 2024, 5:13 p.m. UTC | #1
On 8/9/24 04:31, Dev Jain wrote:
> Do not fail the test for just a single instance of migration failure,
> since migration is a best-effort service.

The cover letter says:

"Given that migration is a best-effort service, it is wrong to fail the
test for just a single failure; hence, fail the test after 100 consecutive
failures (where 100 is still a subjective choice)."

You do want to mention the above here.

The reason being, I would like to know what this does to the run-time of
this test if migration fails and retried 100 times.

> 
> Signed-off-by: Dev Jain <dev.jain@arm.com>
> Suggested-by: David Hildenbrand <david@redhat.com>
> Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
> Tested-by: Ryan Roberts <ryan.roberts@arm.com>
> ---
>   tools/testing/selftests/mm/migration.c | 17 +++++++++++------
>   1 file changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c
> index 6908569ef406..64bcbb7151cf 100644
> --- a/tools/testing/selftests/mm/migration.c
> +++ b/tools/testing/selftests/mm/migration.c
> @@ -15,10 +15,10 @@
>   #include <signal.h>
>   #include <time.h>
>   
> -#define TWOMEG (2<<20)
> -#define RUNTIME (20)
> -
> -#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
> +#define TWOMEG		(2<<20)
> +#define RUNTIME		(20)
> +#define MAX_RETRIES	100
> +#define ALIGN(x, a)	(((x) + (a - 1)) & (~((a) - 1)))
>   
>   FIXTURE(migration)
>   {
> @@ -65,6 +65,7 @@ int migrate(uint64_t *ptr, int n1, int n2)
>   	int ret, tmp;
>   	int status = 0;
>   	struct timespec ts1, ts2;
> +	int failures = 0;
>   
>   	if (clock_gettime(CLOCK_MONOTONIC, &ts1))
>   		return -1;
> @@ -79,13 +80,17 @@ int migrate(uint64_t *ptr, int n1, int n2)
>   		ret = move_pages(0, 1, (void **) &ptr, &n2, &status,
>   				MPOL_MF_MOVE_ALL);
>   		if (ret) {
> -			if (ret > 0)
> +			if (ret > 0) {
> +				/* Migration is best effort; try again */
> +				if (++failures < MAX_RETRIES)
> +					continue;
>   				printf("Didn't migrate %d pages\n", ret);
> +			}
>   			else
>   				perror("Couldn't migrate pages");
>   			return -2;
>   		}
> -
> +		failures = 0;
>   		tmp = n2;
>   		n2 = n1;
>   		n1 = tmp;

thanks,
-- Shuah
Christoph Lameter (Ampere) Aug. 9, 2024, 9:10 p.m. UTC | #2
On Fri, 9 Aug 2024, Shuah Khan wrote:

> "Given that migration is a best-effort service, it is wrong to fail the
> test for just a single failure; hence, fail the test after 100 consecutive
> failures (where 100 is still a subjective choice)."
>
> You do want to mention the above here.
>
> The reason being, I would like to know what this does to the run-time of
> this test if migration fails and retried 100 times.

If we backoff earlier without engaging too much with the page then we can 
in turn affort to retry more times.
Dev Jain Aug. 12, 2024, 6:19 a.m. UTC | #3
On 8/9/24 22:43, Shuah Khan wrote:
> On 8/9/24 04:31, Dev Jain wrote:
>> Do not fail the test for just a single instance of migration failure,
>> since migration is a best-effort service.
>
> The cover letter says:
>
> "Given that migration is a best-effort service, it is wrong to fail the
> test for just a single failure; hence, fail the test after 100 
> consecutive
> failures (where 100 is still a subjective choice)."
>
> You do want to mention the above here.


Sure, shall update in v2.


>
> The reason being, I would like to know what this does to the run-time of
> this test if migration fails and retried 100 times.


Sure; just for the note, it won't affect the execution time of the test 
since
that is controlled by a timeout mechanism.


>
>>
>> Signed-off-by: Dev Jain <dev.jain@arm.com>
>> Suggested-by: David Hildenbrand <david@redhat.com>
>> Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
>> Tested-by: Ryan Roberts <ryan.roberts@arm.com>
>> ---
>>   tools/testing/selftests/mm/migration.c | 17 +++++++++++------
>>   1 file changed, 11 insertions(+), 6 deletions(-)
>>
>> diff --git a/tools/testing/selftests/mm/migration.c 
>> b/tools/testing/selftests/mm/migration.c
>> index 6908569ef406..64bcbb7151cf 100644
>> --- a/tools/testing/selftests/mm/migration.c
>> +++ b/tools/testing/selftests/mm/migration.c
>> @@ -15,10 +15,10 @@
>>   #include <signal.h>
>>   #include <time.h>
>>   -#define TWOMEG (2<<20)
>> -#define RUNTIME (20)
>> -
>> -#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
>> +#define TWOMEG        (2<<20)
>> +#define RUNTIME        (20)
>> +#define MAX_RETRIES    100
>> +#define ALIGN(x, a)    (((x) + (a - 1)) & (~((a) - 1)))
>>     FIXTURE(migration)
>>   {
>> @@ -65,6 +65,7 @@ int migrate(uint64_t *ptr, int n1, int n2)
>>       int ret, tmp;
>>       int status = 0;
>>       struct timespec ts1, ts2;
>> +    int failures = 0;
>>         if (clock_gettime(CLOCK_MONOTONIC, &ts1))
>>           return -1;
>> @@ -79,13 +80,17 @@ int migrate(uint64_t *ptr, int n1, int n2)
>>           ret = move_pages(0, 1, (void **) &ptr, &n2, &status,
>>                   MPOL_MF_MOVE_ALL);
>>           if (ret) {
>> -            if (ret > 0)
>> +            if (ret > 0) {
>> +                /* Migration is best effort; try again */
>> +                if (++failures < MAX_RETRIES)
>> +                    continue;
>>                   printf("Didn't migrate %d pages\n", ret);
>> +            }
>>               else
>>                   perror("Couldn't migrate pages");
>>               return -2;
>>           }
>> -
>> +        failures = 0;
>>           tmp = n2;
>>           n2 = n1;
>>           n1 = tmp;
>
> thanks,
> -- Shuah
diff mbox series

Patch

diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c
index 6908569ef406..64bcbb7151cf 100644
--- a/tools/testing/selftests/mm/migration.c
+++ b/tools/testing/selftests/mm/migration.c
@@ -15,10 +15,10 @@ 
 #include <signal.h>
 #include <time.h>
 
-#define TWOMEG (2<<20)
-#define RUNTIME (20)
-
-#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+#define TWOMEG		(2<<20)
+#define RUNTIME		(20)
+#define MAX_RETRIES	100
+#define ALIGN(x, a)	(((x) + (a - 1)) & (~((a) - 1)))
 
 FIXTURE(migration)
 {
@@ -65,6 +65,7 @@  int migrate(uint64_t *ptr, int n1, int n2)
 	int ret, tmp;
 	int status = 0;
 	struct timespec ts1, ts2;
+	int failures = 0;
 
 	if (clock_gettime(CLOCK_MONOTONIC, &ts1))
 		return -1;
@@ -79,13 +80,17 @@  int migrate(uint64_t *ptr, int n1, int n2)
 		ret = move_pages(0, 1, (void **) &ptr, &n2, &status,
 				MPOL_MF_MOVE_ALL);
 		if (ret) {
-			if (ret > 0)
+			if (ret > 0) {
+				/* Migration is best effort; try again */
+				if (++failures < MAX_RETRIES)
+					continue;
 				printf("Didn't migrate %d pages\n", ret);
+			}
 			else
 				perror("Couldn't migrate pages");
 			return -2;
 		}
-
+		failures = 0;
 		tmp = n2;
 		n2 = n1;
 		n1 = tmp;