diff mbox series

[PULL,10/38] tests/qtest/migration: Add a test for the analyze-migration script

Message ID 20231017083003.15951-11-quintela@redhat.com (mailing list archive)
State New, archived
Headers show
Series [PULL,01/38] migration: refactor migration_completion | expand

Commit Message

Juan Quintela Oct. 17, 2023, 8:29 a.m. UTC
From: Fabiano Rosas <farosas@suse.de>

Add a smoke test that migrates to a file and gives it to the
script. It should catch the most annoying errors such as changes in
the ram flags.

After code has been merged it becomes way harder to figure out what is
causing the script to fail, the person making the change is the most
likely to know right away what the problem is.

Signed-off-by: Fabiano Rosas <farosas@suse.de>
Acked-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231009184326.15777-7-farosas@suse.de>
---
 tests/qtest/migration-test.c | 60 ++++++++++++++++++++++++++++++++++++
 tests/qtest/meson.build      |  2 ++
 2 files changed, 62 insertions(+)

Comments

Alex Bennée May 21, 2024, 12:24 p.m. UTC | #1
Juan Quintela <quintela@redhat.com> writes:

> From: Fabiano Rosas <farosas@suse.de>
>
> Add a smoke test that migrates to a file and gives it to the
> script. It should catch the most annoying errors such as changes in
> the ram flags.
>
> After code has been merged it becomes way harder to figure out what is
> causing the script to fail, the person making the change is the most
> likely to know right away what the problem is.
>
> Signed-off-by: Fabiano Rosas <farosas@suse.de>
> Acked-by: Thomas Huth <thuth@redhat.com>
> Reviewed-by: Juan Quintela <quintela@redhat.com>
> Signed-off-by: Juan Quintela <quintela@redhat.com>
> Message-ID: <20231009184326.15777-7-farosas@suse.de>

I bisected the failures I'm seeing on s390x to the introduction of this
script. I don't know if its simply a timeout on a relatively slow VM:

Summary of Failures:

 36/546 qemu:qtest+qtest-s390x / qtest-s390x/migration-test                       ERROR          93.51s   killed by signal 6 SIGABRT

It seems to be unstable as we pass sometimes:

11:26:42 [ajb@qemu01:~/l/q/b/system] master|… + ./pyvenv/bin/meson test --repeat 100 qtest-s390x/migration-test
ninja: Entering directory `/home/ajb/lsrc/qemu.git/builds/system'
[1/9] Generating qemu-version.h with a custom command (wrapped by meson to capture output)
  1/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          ERROR          251.98s   killed by signal 6 SIGABRT
>>> MALLOC_PERTURB_=9 PYTHON=/home/ajb/lsrc/qemu.git/builds/system/pyvenv/bin/python3 G_TEST_DBUS_DAEMON=/home/ajb/lsrc/qemu.git/tests/dbus-vmstate-daemon.sh QTEST_QEMU_BINARY=./qemu-system-s390x QTEST_QEMU_IMG=./qemu-img QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon /home/ajb/lsrc/qemu.git/builds/system/tests/qtest/migration-test --tap -k

  2/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          ERROR          258.71s   killed by signal 6 SIGABRT
>>> PYTHON=/home/ajb/lsrc/qemu.git/builds/system/pyvenv/bin/python3 MALLOC_PERTURB_=205 G_TEST_DBUS_DAEMON=/home/ajb/lsrc/qemu.git/tests/dbus-vmstate-daemon.sh QTEST_QEMU_BINARY=./qemu-system-s390x QTEST_QEMU_IMG=./qemu-img QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon /home/ajb/lsrc/qemu.git/builds/system/tests/qtest/migration-test --tap -k

  3/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          OK             302.53s   46 subtests passed
  4/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          OK             319.56s   46 subtests passed
  5/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          OK             320.11s   46 subtests passed
  6/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          OK             328.40s   46 subtests passed

Ok:                 4   
Expected Fail:      0   
Fail:               2   
Unexpected Pass:    0   
Skipped:            0   
Timeout:            0   

> ---
>  tests/qtest/migration-test.c | 60 ++++++++++++++++++++++++++++++++++++
>  tests/qtest/meson.build      |  2 ++
>  2 files changed, 62 insertions(+)
>
> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> index 8eb2053dbb..cef5081f8c 100644
> --- a/tests/qtest/migration-test.c
> +++ b/tests/qtest/migration-test.c
> @@ -66,6 +66,8 @@ static bool got_dst_resume;
>   */
>  #define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
>  
> +#define ANALYZE_SCRIPT "scripts/analyze-migration.py"
> +
>  #if defined(__linux__)
>  #include <sys/syscall.h>
>  #include <sys/vfs.h>
> @@ -1501,6 +1503,61 @@ static void test_baddest(void)
>      test_migrate_end(from, to, false);
>  }
>  
> +#ifndef _WIN32
> +static void test_analyze_script(void)
> +{
> +    MigrateStart args = {
> +        .opts_source = "-uuid 11111111-1111-1111-1111-111111111111",
> +    };
> +    QTestState *from, *to;
> +    g_autofree char *uri = NULL;
> +    g_autofree char *file = NULL;
> +    int pid, wstatus;
> +    const char *python = g_getenv("PYTHON");
> +
> +    if (!python) {
> +        g_test_skip("PYTHON variable not set");
> +        return;
> +    }
> +
> +    /* dummy url */
> +    if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", &args)) {
> +        return;
> +    }
> +
> +    /*
> +     * Setting these two capabilities causes the "configuration"
> +     * vmstate to include subsections for them. The script needs to
> +     * parse those subsections properly.
> +     */
> +    migrate_set_capability(from, "validate-uuid", true);
> +    migrate_set_capability(from, "x-ignore-shared", true);
> +
> +    file = g_strdup_printf("%s/migfile", tmpfs);
> +    uri = g_strdup_printf("exec:cat > %s", file);
> +
> +    migrate_ensure_converge(from);
> +    migrate_qmp(from, uri, "{}");
> +    wait_for_migration_complete(from);
> +
> +    pid = fork();
> +    if (!pid) {
> +        close(1);
> +        open("/dev/null", O_WRONLY);
> +        execl(python, python, ANALYZE_SCRIPT, "-f", file, NULL);
> +        g_assert_not_reached();
> +    }
> +
> +    g_assert(waitpid(pid, &wstatus, 0) == pid);
> +    if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
> +        g_test_message("Failed to analyze the migration stream");
> +        g_test_fail();
> +    }
> +    test_migrate_end(from, to, false);
> +    cleanup("migfile");
> +}
> +#endif
> +
>  static void test_precopy_common(MigrateCommon *args)
>  {
>      QTestState *from, *to;
> @@ -2837,6 +2894,9 @@ int main(int argc, char **argv)
>      }
>  
>      qtest_add_func("/migration/bad_dest", test_baddest);
> +#ifndef _WIN32
> +    qtest_add_func("/migration/analyze-script", test_analyze_script);
> +#endif
>      qtest_add_func("/migration/precopy/unix/plain", test_precopy_unix_plain);
>      qtest_add_func("/migration/precopy/unix/xbzrle", test_precopy_unix_xbzrle);
>      /*
> diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
> index 66795cfcd2..d6022ebd64 100644
> --- a/tests/qtest/meson.build
> +++ b/tests/qtest/meson.build
> @@ -357,6 +357,8 @@ foreach dir : target_dirs
>      test_deps += [qsd]
>    endif
>  
> +  qtest_env.set('PYTHON', python.full_path())
> +
>    foreach test : target_qtests
>      # Executables are shared across targets, declare them only the first time we
>      # encounter them
Fabiano Rosas May 21, 2024, 12:46 p.m. UTC | #2
Alex Bennée <alex.bennee@linaro.org> writes:

> Juan Quintela <quintela@redhat.com> writes:
>
>> From: Fabiano Rosas <farosas@suse.de>
>>
>> Add a smoke test that migrates to a file and gives it to the
>> script. It should catch the most annoying errors such as changes in
>> the ram flags.
>>
>> After code has been merged it becomes way harder to figure out what is
>> causing the script to fail, the person making the change is the most
>> likely to know right away what the problem is.
>>
>> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>> Acked-by: Thomas Huth <thuth@redhat.com>
>> Reviewed-by: Juan Quintela <quintela@redhat.com>
>> Signed-off-by: Juan Quintela <quintela@redhat.com>
>> Message-ID: <20231009184326.15777-7-farosas@suse.de>
>
> I bisected the failures I'm seeing on s390x to the introduction of this
> script. I don't know if its simply a timeout on a relatively slow VM:

What's the range of your bisect? That test has been disabled and then
reenabled on s390x. It could be tripping the bisect.

04131e0009 ("tests/qtest/migration-test: Disable the analyze-migration.py test on s390x")
81c2c9dd5d ("tests/qtest/migration-test: Fix analyze-migration.py for s390x")

I don't think that test itself could be timing out. It's a very simple
test. It runs a migration and then uses the output to validate the
script.

I don't have a Z machine at hand and the migration tests only run with
KVM for s390x, so it would be useful to take a look at meson's
testlog.txt so we can see which test is failing and hopefully in what
way it is failing.

If you're up for it, running this in a loop is usually the best way to
catch any intermittent issues:

QTEST_QEMU_BINARY=./qemu-system-x86_64 ./tests/qtest/migration-test

And once you figure out which test, there's this monstrosity:

QTEST_QEMU_BINARY='gdb -q --ex "set pagination off"          \
                          --ex "set print thread-events off" \
                          --ex "handle SIGUSR1 noprint"      \
                          --ex "handle SIGPIPE noprint"      \
                          --ex "run" --ex "quit \$_exitcode" \
                          --args ./qemu-system-x86_64'       \
                          gdb -q --ex "set prompt (qtest) "  \
                          --ex "handle SIGPIPE noprint"      \
                          --args ./tests/qtest/migration-test -p /x86_64/migration/<some>/<test>

> Summary of Failures:
>
>  36/546 qemu:qtest+qtest-s390x / qtest-s390x/migration-test                       ERROR          93.51s   killed by signal 6 SIGABRT
>
> It seems to be unstable as we pass sometimes:
>
> 11:26:42 [ajb@qemu01:~/l/q/b/system] master|… + ./pyvenv/bin/meson test --repeat 100 qtest-s390x/migration-test
> ninja: Entering directory `/home/ajb/lsrc/qemu.git/builds/system'
> [1/9] Generating qemu-version.h with a custom command (wrapped by meson to capture output)
>   1/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          ERROR          251.98s   killed by signal 6 SIGABRT
>>>> MALLOC_PERTURB_=9 PYTHON=/home/ajb/lsrc/qemu.git/builds/system/pyvenv/bin/python3 G_TEST_DBUS_DAEMON=/home/ajb/lsrc/qemu.git/tests/dbus-vmstate-daemon.sh QTEST_QEMU_BINARY=./qemu-system-s390x QTEST_QEMU_IMG=./qemu-img QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon /home/ajb/lsrc/qemu.git/builds/system/tests/qtest/migration-test --tap -k
>
>   2/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          ERROR          258.71s   killed by signal 6 SIGABRT
>>>> PYTHON=/home/ajb/lsrc/qemu.git/builds/system/pyvenv/bin/python3 MALLOC_PERTURB_=205 G_TEST_DBUS_DAEMON=/home/ajb/lsrc/qemu.git/tests/dbus-vmstate-daemon.sh QTEST_QEMU_BINARY=./qemu-system-s390x QTEST_QEMU_IMG=./qemu-img QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon /home/ajb/lsrc/qemu.git/builds/system/tests/qtest/migration-test --tap -k
>
>   3/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          OK             302.53s   46 subtests passed
>   4/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          OK             319.56s   46 subtests passed
>   5/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          OK             320.11s   46 subtests passed
>   6/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          OK             328.40s   46 subtests passed
>
> Ok:                 4   
> Expected Fail:      0   
> Fail:               2   
> Unexpected Pass:    0   
> Skipped:            0   
> Timeout:            0   
>
>> ---
>>  tests/qtest/migration-test.c | 60 ++++++++++++++++++++++++++++++++++++
>>  tests/qtest/meson.build      |  2 ++
>>  2 files changed, 62 insertions(+)
>>
>> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
>> index 8eb2053dbb..cef5081f8c 100644
>> --- a/tests/qtest/migration-test.c
>> +++ b/tests/qtest/migration-test.c
>> @@ -66,6 +66,8 @@ static bool got_dst_resume;
>>   */
>>  #define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
>>  
>> +#define ANALYZE_SCRIPT "scripts/analyze-migration.py"
>> +
>>  #if defined(__linux__)
>>  #include <sys/syscall.h>
>>  #include <sys/vfs.h>
>> @@ -1501,6 +1503,61 @@ static void test_baddest(void)
>>      test_migrate_end(from, to, false);
>>  }
>>  
>> +#ifndef _WIN32
>> +static void test_analyze_script(void)
>> +{
>> +    MigrateStart args = {
>> +        .opts_source = "-uuid 11111111-1111-1111-1111-111111111111",
>> +    };
>> +    QTestState *from, *to;
>> +    g_autofree char *uri = NULL;
>> +    g_autofree char *file = NULL;
>> +    int pid, wstatus;
>> +    const char *python = g_getenv("PYTHON");
>> +
>> +    if (!python) {
>> +        g_test_skip("PYTHON variable not set");
>> +        return;
>> +    }
>> +
>> +    /* dummy url */
>> +    if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", &args)) {
>> +        return;
>> +    }
>> +
>> +    /*
>> +     * Setting these two capabilities causes the "configuration"
>> +     * vmstate to include subsections for them. The script needs to
>> +     * parse those subsections properly.
>> +     */
>> +    migrate_set_capability(from, "validate-uuid", true);
>> +    migrate_set_capability(from, "x-ignore-shared", true);
>> +
>> +    file = g_strdup_printf("%s/migfile", tmpfs);
>> +    uri = g_strdup_printf("exec:cat > %s", file);
>> +
>> +    migrate_ensure_converge(from);
>> +    migrate_qmp(from, uri, "{}");
>> +    wait_for_migration_complete(from);
>> +
>> +    pid = fork();
>> +    if (!pid) {
>> +        close(1);
>> +        open("/dev/null", O_WRONLY);
>> +        execl(python, python, ANALYZE_SCRIPT, "-f", file, NULL);
>> +        g_assert_not_reached();
>> +    }
>> +
>> +    g_assert(waitpid(pid, &wstatus, 0) == pid);
>> +    if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
>> +        g_test_message("Failed to analyze the migration stream");
>> +        g_test_fail();
>> +    }
>> +    test_migrate_end(from, to, false);
>> +    cleanup("migfile");
>> +}
>> +#endif
>> +
>>  static void test_precopy_common(MigrateCommon *args)
>>  {
>>      QTestState *from, *to;
>> @@ -2837,6 +2894,9 @@ int main(int argc, char **argv)
>>      }
>>  
>>      qtest_add_func("/migration/bad_dest", test_baddest);
>> +#ifndef _WIN32
>> +    qtest_add_func("/migration/analyze-script", test_analyze_script);
>> +#endif
>>      qtest_add_func("/migration/precopy/unix/plain", test_precopy_unix_plain);
>>      qtest_add_func("/migration/precopy/unix/xbzrle", test_precopy_unix_xbzrle);
>>      /*
>> diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
>> index 66795cfcd2..d6022ebd64 100644
>> --- a/tests/qtest/meson.build
>> +++ b/tests/qtest/meson.build
>> @@ -357,6 +357,8 @@ foreach dir : target_dirs
>>      test_deps += [qsd]
>>    endif
>>  
>> +  qtest_env.set('PYTHON', python.full_path())
>> +
>>    foreach test : target_qtests
>>      # Executables are shared across targets, declare them only the first time we
>>      # encounter them
Thomas Huth May 22, 2024, 5:36 a.m. UTC | #3
On 21/05/2024 14.46, Fabiano Rosas wrote:
> Alex Bennée <alex.bennee@linaro.org> writes:
> 
>> Juan Quintela <quintela@redhat.com> writes:
>>
>>> From: Fabiano Rosas <farosas@suse.de>
>>>
>>> Add a smoke test that migrates to a file and gives it to the
>>> script. It should catch the most annoying errors such as changes in
>>> the ram flags.
>>>
>>> After code has been merged it becomes way harder to figure out what is
>>> causing the script to fail, the person making the change is the most
>>> likely to know right away what the problem is.
>>>
>>> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>>> Acked-by: Thomas Huth <thuth@redhat.com>
>>> Reviewed-by: Juan Quintela <quintela@redhat.com>
>>> Signed-off-by: Juan Quintela <quintela@redhat.com>
>>> Message-ID: <20231009184326.15777-7-farosas@suse.de>
>>
>> I bisected the failures I'm seeing on s390x to the introduction of this
>> script. I don't know if its simply a timeout on a relatively slow VM:
> 
> What's the range of your bisect? That test has been disabled and then
> reenabled on s390x. It could be tripping the bisect.
> 
> 04131e0009 ("tests/qtest/migration-test: Disable the analyze-migration.py test on s390x")
> 81c2c9dd5d ("tests/qtest/migration-test: Fix analyze-migration.py for s390x")
> 
> I don't think that test itself could be timing out. It's a very simple
> test. It runs a migration and then uses the output to validate the
> script.

Agreed, the analyze-migration.py is unlikely to be the issue - especially 
since it seems to have been disabled again in commit 6f0771de903b ...
Fabiano, why did you disable it here again? The reason is not mentioned in 
the commit description.

But with regards to the failures, please note that we also had a bug 
recently, starting with commit 9d1b0f5bf515a0 and just fixed in commit 
bebe9603fcb072dc ... maybe that affected your bisecting, too.
(it's really bad that this bug sneaked in without being noticed ... we 
should maybe look into running at least some of the migration tests with TCG 
on x86 hosts, too...)

  Thomas
Fabiano Rosas May 22, 2024, 12:48 p.m. UTC | #4
Thomas Huth <thuth@redhat.com> writes:

> On 21/05/2024 14.46, Fabiano Rosas wrote:
>> Alex Bennée <alex.bennee@linaro.org> writes:
>> 
>>> Juan Quintela <quintela@redhat.com> writes:
>>>
>>>> From: Fabiano Rosas <farosas@suse.de>
>>>>
>>>> Add a smoke test that migrates to a file and gives it to the
>>>> script. It should catch the most annoying errors such as changes in
>>>> the ram flags.
>>>>
>>>> After code has been merged it becomes way harder to figure out what is
>>>> causing the script to fail, the person making the change is the most
>>>> likely to know right away what the problem is.
>>>>
>>>> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>>>> Acked-by: Thomas Huth <thuth@redhat.com>
>>>> Reviewed-by: Juan Quintela <quintela@redhat.com>
>>>> Signed-off-by: Juan Quintela <quintela@redhat.com>
>>>> Message-ID: <20231009184326.15777-7-farosas@suse.de>
>>>
>>> I bisected the failures I'm seeing on s390x to the introduction of this
>>> script. I don't know if its simply a timeout on a relatively slow VM:
>> 
>> What's the range of your bisect? That test has been disabled and then
>> reenabled on s390x. It could be tripping the bisect.
>> 
>> 04131e0009 ("tests/qtest/migration-test: Disable the analyze-migration.py test on s390x")
>> 81c2c9dd5d ("tests/qtest/migration-test: Fix analyze-migration.py for s390x")
>> 
>> I don't think that test itself could be timing out. It's a very simple
>> test. It runs a migration and then uses the output to validate the
>> script.
>
> Agreed, the analyze-migration.py is unlikely to be the issue - especially 
> since it seems to have been disabled again in commit 6f0771de903b ...
> Fabiano, why did you disable it here again? The reason is not mentioned in 
> the commit description.

Your patch 81c2c9dd5d was merged between my v1 and v2 on the list and I
didn't notice so I messed up the rebase. I'll send a patch soon to fix
that.
Thomas Huth May 22, 2024, 1 p.m. UTC | #5
On 22/05/2024 14.48, Fabiano Rosas wrote:
> Thomas Huth <thuth@redhat.com> writes:
> 
>> On 21/05/2024 14.46, Fabiano Rosas wrote:
>>> Alex Bennée <alex.bennee@linaro.org> writes:
>>>
>>>> Juan Quintela <quintela@redhat.com> writes:
>>>>
>>>>> From: Fabiano Rosas <farosas@suse.de>
>>>>>
>>>>> Add a smoke test that migrates to a file and gives it to the
>>>>> script. It should catch the most annoying errors such as changes in
>>>>> the ram flags.
>>>>>
>>>>> After code has been merged it becomes way harder to figure out what is
>>>>> causing the script to fail, the person making the change is the most
>>>>> likely to know right away what the problem is.
>>>>>
>>>>> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>>>>> Acked-by: Thomas Huth <thuth@redhat.com>
>>>>> Reviewed-by: Juan Quintela <quintela@redhat.com>
>>>>> Signed-off-by: Juan Quintela <quintela@redhat.com>
>>>>> Message-ID: <20231009184326.15777-7-farosas@suse.de>
>>>>
>>>> I bisected the failures I'm seeing on s390x to the introduction of this
>>>> script. I don't know if its simply a timeout on a relatively slow VM:
>>>
>>> What's the range of your bisect? That test has been disabled and then
>>> reenabled on s390x. It could be tripping the bisect.
>>>
>>> 04131e0009 ("tests/qtest/migration-test: Disable the analyze-migration.py test on s390x")
>>> 81c2c9dd5d ("tests/qtest/migration-test: Fix analyze-migration.py for s390x")
>>>
>>> I don't think that test itself could be timing out. It's a very simple
>>> test. It runs a migration and then uses the output to validate the
>>> script.
>>
>> Agreed, the analyze-migration.py is unlikely to be the issue - especially
>> since it seems to have been disabled again in commit 6f0771de903b ...
>> Fabiano, why did you disable it here again? The reason is not mentioned in
>> the commit description.
> 
> Your patch 81c2c9dd5d was merged between my v1 and v2 on the list and I
> didn't notice so I messed up the rebase. I'll send a patch soon to fix
> that.

Thanks, but I already sent a patch earlier today that should fix the issue:

 
https://lore.kernel.org/qemu-devel/20240522091255.417263-1-thuth@redhat.com/T/#u

  Thomas
Alex Bennée May 22, 2024, 2:11 p.m. UTC | #6
Fabiano Rosas <farosas@suse.de> writes:

> Alex Bennée <alex.bennee@linaro.org> writes:
>
>> Juan Quintela <quintela@redhat.com> writes:
>>
>>> From: Fabiano Rosas <farosas@suse.de>
>>>
>>> Add a smoke test that migrates to a file and gives it to the
>>> script. It should catch the most annoying errors such as changes in
>>> the ram flags.
>>>
>>> After code has been merged it becomes way harder to figure out what is
>>> causing the script to fail, the person making the change is the most
>>> likely to know right away what the problem is.
>>>
>>> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>>> Acked-by: Thomas Huth <thuth@redhat.com>
>>> Reviewed-by: Juan Quintela <quintela@redhat.com>
>>> Signed-off-by: Juan Quintela <quintela@redhat.com>
>>> Message-ID: <20231009184326.15777-7-farosas@suse.de>
>>
>> I bisected the failures I'm seeing on s390x to the introduction of this
>> script. I don't know if its simply a timeout on a relatively slow VM:
>
> What's the range of your bisect? That test has been disabled and then
> reenabled on s390x. It could be tripping the bisect.
>
> 04131e0009 ("tests/qtest/migration-test: Disable the analyze-migration.py test on s390x")
> 81c2c9dd5d ("tests/qtest/migration-test: Fix analyze-migration.py for
> s390x")

I ran between v8.1.0 and master.

But it is still failing @ HEAD 01782d6b294f95bcde334386f0aaac593cd28c0d
as you can see from the run I did at the end.

>
> I don't think that test itself could be timing out. It's a very simple
> test. It runs a migration and then uses the output to validate the
> script.
>
> I don't have a Z machine at hand and the migration tests only run with
> KVM for s390x, so it would be useful to take a look at meson's
> testlog.txt so we can see which test is failing and hopefully in what
> way it is failing.
>
> If you're up for it, running this in a loop is usually the best way to
> catch any intermittent issues:
>
> QTEST_QEMU_BINARY=./qemu-system-x86_64 ./tests/qtest/migration-test
>
> And once you figure out which test, there's this monstrosity:
>
> QTEST_QEMU_BINARY='gdb -q --ex "set pagination off"          \
>                           --ex "set print thread-events off" \
>                           --ex "handle SIGUSR1 noprint"      \
>                           --ex "handle SIGPIPE noprint"      \
>                           --ex "run" --ex "quit \$_exitcode" \
>                           --args ./qemu-system-x86_64'       \
>                           gdb -q --ex "set prompt (qtest) "  \
>                           --ex "handle SIGPIPE noprint"      \
>                           --args ./tests/qtest/migration-test -p /x86_64/migration/<some>/<test>
>
>> Summary of Failures:
>>
>>  36/546 qemu:qtest+qtest-s390x / qtest-s390x/migration-test                       ERROR          93.51s   killed by signal 6 SIGABRT
>>
>> It seems to be unstable as we pass sometimes:
>>
>> 11:26:42 [ajb@qemu01:~/l/q/b/system] master|… + ./pyvenv/bin/meson test --repeat 100 qtest-s390x/migration-test
>> ninja: Entering directory `/home/ajb/lsrc/qemu.git/builds/system'
>> [1/9] Generating qemu-version.h with a custom command (wrapped by meson to capture output)
>>   1/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          ERROR          251.98s   killed by signal 6 SIGABRT
>>>>> MALLOC_PERTURB_=9
>>>>> PYTHON=/home/ajb/lsrc/qemu.git/builds/system/pyvenv/bin/python3
>>>>> G_TEST_DBUS_DAEMON=/home/ajb/lsrc/qemu.git/tests/dbus-vmstate-daemon.sh
>>>>> QTEST_QEMU_BINARY=./qemu-system-s390x QTEST_QEMU_IMG=./qemu-img
>>>>> QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon
>>>>> /home/ajb/lsrc/qemu.git/builds/system/tests/qtest/migration-test
>>>>> --tap -k
>>
>>   2/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          ERROR          258.71s   killed by signal 6 SIGABRT
>>>>> PYTHON=/home/ajb/lsrc/qemu.git/builds/system/pyvenv/bin/python3
>>>>> MALLOC_PERTURB_=205
>>>>> G_TEST_DBUS_DAEMON=/home/ajb/lsrc/qemu.git/tests/dbus-vmstate-daemon.sh
>>>>> QTEST_QEMU_BINARY=./qemu-system-s390x QTEST_QEMU_IMG=./qemu-img
>>>>> QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon
>>>>> /home/ajb/lsrc/qemu.git/builds/system/tests/qtest/migration-test
>>>>> --tap -k
>>
>>   3/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          OK             302.53s   46 subtests passed
>>   4/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          OK             319.56s   46 subtests passed
>>   5/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          OK             320.11s   46 subtests passed
>>   6/100 qemu:qtest+qtest-s390x / qtest-s390x/migration-test          OK             328.40s   46 subtests passed
>>
>> Ok:                 4   
>> Expected Fail:      0   
>> Fail:               2   
>> Unexpected Pass:    0   
>> Skipped:            0   
>> Timeout:            0   
>>
>>> ---
>>>  tests/qtest/migration-test.c | 60 ++++++++++++++++++++++++++++++++++++
>>>  tests/qtest/meson.build      |  2 ++
>>>  2 files changed, 62 insertions(+)
>>>
>>> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
>>> index 8eb2053dbb..cef5081f8c 100644
>>> --- a/tests/qtest/migration-test.c
>>> +++ b/tests/qtest/migration-test.c
>>> @@ -66,6 +66,8 @@ static bool got_dst_resume;
>>>   */
>>>  #define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
>>>  
>>> +#define ANALYZE_SCRIPT "scripts/analyze-migration.py"
>>> +
>>>  #if defined(__linux__)
>>>  #include <sys/syscall.h>
>>>  #include <sys/vfs.h>
>>> @@ -1501,6 +1503,61 @@ static void test_baddest(void)
>>>      test_migrate_end(from, to, false);
>>>  }
>>>  
>>> +#ifndef _WIN32
>>> +static void test_analyze_script(void)
>>> +{
>>> +    MigrateStart args = {
>>> +        .opts_source = "-uuid 11111111-1111-1111-1111-111111111111",
>>> +    };
>>> +    QTestState *from, *to;
>>> +    g_autofree char *uri = NULL;
>>> +    g_autofree char *file = NULL;
>>> +    int pid, wstatus;
>>> +    const char *python = g_getenv("PYTHON");
>>> +
>>> +    if (!python) {
>>> +        g_test_skip("PYTHON variable not set");
>>> +        return;
>>> +    }
>>> +
>>> +    /* dummy url */
>>> +    if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", &args)) {
>>> +        return;
>>> +    }
>>> +
>>> +    /*
>>> +     * Setting these two capabilities causes the "configuration"
>>> +     * vmstate to include subsections for them. The script needs to
>>> +     * parse those subsections properly.
>>> +     */
>>> +    migrate_set_capability(from, "validate-uuid", true);
>>> +    migrate_set_capability(from, "x-ignore-shared", true);
>>> +
>>> +    file = g_strdup_printf("%s/migfile", tmpfs);
>>> +    uri = g_strdup_printf("exec:cat > %s", file);
>>> +
>>> +    migrate_ensure_converge(from);
>>> +    migrate_qmp(from, uri, "{}");
>>> +    wait_for_migration_complete(from);
>>> +
>>> +    pid = fork();
>>> +    if (!pid) {
>>> +        close(1);
>>> +        open("/dev/null", O_WRONLY);
>>> +        execl(python, python, ANALYZE_SCRIPT, "-f", file, NULL);
>>> +        g_assert_not_reached();
>>> +    }
>>> +
>>> +    g_assert(waitpid(pid, &wstatus, 0) == pid);
>>> +    if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
>>> +        g_test_message("Failed to analyze the migration stream");
>>> +        g_test_fail();
>>> +    }
>>> +    test_migrate_end(from, to, false);
>>> +    cleanup("migfile");
>>> +}
>>> +#endif
>>> +
>>>  static void test_precopy_common(MigrateCommon *args)
>>>  {
>>>      QTestState *from, *to;
>>> @@ -2837,6 +2894,9 @@ int main(int argc, char **argv)
>>>      }
>>>  
>>>      qtest_add_func("/migration/bad_dest", test_baddest);
>>> +#ifndef _WIN32
>>> +    qtest_add_func("/migration/analyze-script", test_analyze_script);
>>> +#endif
>>>      qtest_add_func("/migration/precopy/unix/plain", test_precopy_unix_plain);
>>>      qtest_add_func("/migration/precopy/unix/xbzrle", test_precopy_unix_xbzrle);
>>>      /*
>>> diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
>>> index 66795cfcd2..d6022ebd64 100644
>>> --- a/tests/qtest/meson.build
>>> +++ b/tests/qtest/meson.build
>>> @@ -357,6 +357,8 @@ foreach dir : target_dirs
>>>      test_deps += [qsd]
>>>    endif
>>>  
>>> +  qtest_env.set('PYTHON', python.full_path())
>>> +
>>>    foreach test : target_qtests
>>>      # Executables are shared across targets, declare them only the first time we
>>>      # encounter them
diff mbox series

Patch

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 8eb2053dbb..cef5081f8c 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -66,6 +66,8 @@  static bool got_dst_resume;
  */
 #define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
 
+#define ANALYZE_SCRIPT "scripts/analyze-migration.py"
+
 #if defined(__linux__)
 #include <sys/syscall.h>
 #include <sys/vfs.h>
@@ -1501,6 +1503,61 @@  static void test_baddest(void)
     test_migrate_end(from, to, false);
 }
 
+#ifndef _WIN32
+static void test_analyze_script(void)
+{
+    MigrateStart args = {
+        .opts_source = "-uuid 11111111-1111-1111-1111-111111111111",
+    };
+    QTestState *from, *to;
+    g_autofree char *uri = NULL;
+    g_autofree char *file = NULL;
+    int pid, wstatus;
+    const char *python = g_getenv("PYTHON");
+
+    if (!python) {
+        g_test_skip("PYTHON variable not set");
+        return;
+    }
+
+    /* dummy url */
+    if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", &args)) {
+        return;
+    }
+
+    /*
+     * Setting these two capabilities causes the "configuration"
+     * vmstate to include subsections for them. The script needs to
+     * parse those subsections properly.
+     */
+    migrate_set_capability(from, "validate-uuid", true);
+    migrate_set_capability(from, "x-ignore-shared", true);
+
+    file = g_strdup_printf("%s/migfile", tmpfs);
+    uri = g_strdup_printf("exec:cat > %s", file);
+
+    migrate_ensure_converge(from);
+    migrate_qmp(from, uri, "{}");
+    wait_for_migration_complete(from);
+
+    pid = fork();
+    if (!pid) {
+        close(1);
+        open("/dev/null", O_WRONLY);
+        execl(python, python, ANALYZE_SCRIPT, "-f", file, NULL);
+        g_assert_not_reached();
+    }
+
+    g_assert(waitpid(pid, &wstatus, 0) == pid);
+    if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
+        g_test_message("Failed to analyze the migration stream");
+        g_test_fail();
+    }
+    test_migrate_end(from, to, false);
+    cleanup("migfile");
+}
+#endif
+
 static void test_precopy_common(MigrateCommon *args)
 {
     QTestState *from, *to;
@@ -2837,6 +2894,9 @@  int main(int argc, char **argv)
     }
 
     qtest_add_func("/migration/bad_dest", test_baddest);
+#ifndef _WIN32
+    qtest_add_func("/migration/analyze-script", test_analyze_script);
+#endif
     qtest_add_func("/migration/precopy/unix/plain", test_precopy_unix_plain);
     qtest_add_func("/migration/precopy/unix/xbzrle", test_precopy_unix_xbzrle);
     /*
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 66795cfcd2..d6022ebd64 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -357,6 +357,8 @@  foreach dir : target_dirs
     test_deps += [qsd]
   endif
 
+  qtest_env.set('PYTHON', python.full_path())
+
   foreach test : target_qtests
     # Executables are shared across targets, declare them only the first time we
     # encounter them