diff mbox series

[PULL,8/8] migration: Add qtest for migration over RDMA

Message ID 20250307181551.19887-9-farosas@suse.de (mailing list archive)
State New
Headers show
Series [PULL,1/8] migration: Fix UAF for incoming migration on MigrationState | expand

Commit Message

Fabiano Rosas March 7, 2025, 6:15 p.m. UTC
From: Li Zhijian <lizhijian@fujitsu.com>

This qtest requires there is a RDMA(RoCE) link in the host.
In order to make the test work smoothly, introduce a
scripts/rdma-migration-helper.sh to
- setup a new Soft-RoCE(aka RXE) if it's root
- detect existing RoCE link

Test will be skipped if there is no available RoCE link.
 # Start of rdma tests
 # Running /x86_64/migration/precopy/rdma/plain
 Command 'rdma' is not available, please install it first.
 # To enable the test:
 # (1) Run 'scripts/rdma-migration-helper.sh setup' with root and rerun the test
 # or
 # (2) Run the test with root privilege
 #
 ok 1 /x86_64/migration/precopy/rdma/plain # SKIP No rdma link available
 # End of rdma tests

Note: Remove the newly added RXE link by executing 'modprobe -r rdma_rxe'
or by specifying 'clean' within this script.

Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Message-ID: <20250305062825.772629-7-lizhijian@fujitsu.com>
[reformated the message to be under 90 characters]
Signed-off-by: Fabiano Rosas <farosas@suse.de>
---
 MAINTAINERS                           |  1 +
 scripts/rdma-migration-helper.sh      | 48 +++++++++++++++++++
 tests/qtest/migration/precopy-tests.c | 69 +++++++++++++++++++++++++++
 3 files changed, 118 insertions(+)
 create mode 100755 scripts/rdma-migration-helper.sh

Comments

Philippe Mathieu-Daudé March 8, 2025, 6 a.m. UTC | #1
Hi,

On 7/3/25 19:15, Fabiano Rosas wrote:
> From: Li Zhijian <lizhijian@fujitsu.com>
> 
> This qtest requires there is a RDMA(RoCE) link in the host.
> In order to make the test work smoothly, introduce a
> scripts/rdma-migration-helper.sh to
> - setup a new Soft-RoCE(aka RXE) if it's root
> - detect existing RoCE link
> 
> Test will be skipped if there is no available RoCE link.

Is it? Runing as user I'm getting:

   RDMA ERROR: RDMA host is not set!

Apparently called via:

qemu_start_incoming_migration()
   -> rdma_start_incoming_migration()
      -> qemu_rdma_dest_init()

>   # Start of rdma tests
>   # Running /x86_64/migration/precopy/rdma/plain
>   Command 'rdma' is not available, please install it first.
>   # To enable the test:
>   # (1) Run 'scripts/rdma-migration-helper.sh setup' with root and rerun the test
>   # or
>   # (2) Run the test with root privilege

Could this might be the issue, should we skip if not root, as calling
the script in "detect" mode makes the new_rdma_link() method to succeed.

>   #
>   ok 1 /x86_64/migration/precopy/rdma/plain # SKIP No rdma link available
>   # End of rdma tests
> 
> Note: Remove the newly added RXE link by executing 'modprobe -r rdma_rxe'
> or by specifying 'clean' within this script.

qtest_add() provides both setup() / teardown() methods.

Test leaving system in different state seems bogus to me.
More even if the information is buried in a commit description...

We shouldn't merge this patch as is IMHO.

Regards,

Phil.

> Reviewed-by: Peter Xu <peterx@redhat.com>
> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> Message-ID: <20250305062825.772629-7-lizhijian@fujitsu.com>
> [reformated the message to be under 90 characters]
> Signed-off-by: Fabiano Rosas <farosas@suse.de>
> ---
>   MAINTAINERS                           |  1 +
>   scripts/rdma-migration-helper.sh      | 48 +++++++++++++++++++
>   tests/qtest/migration/precopy-tests.c | 69 +++++++++++++++++++++++++++
>   3 files changed, 118 insertions(+)
>   create mode 100755 scripts/rdma-migration-helper.sh
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 5df6020ed5..56e85adcfb 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -3517,6 +3517,7 @@ R: Li Zhijian <lizhijian@fujitsu.com>
>   R: Peter Xu <peterx@redhat.com>
>   S: Odd Fixes
>   F: migration/rdma*
> +F: scripts/rdma-migration-helper.sh
>   
>   Migration dirty limit and dirty page rate
>   M: Hyman Huang <yong.huang@smartx.com>
> diff --git a/scripts/rdma-migration-helper.sh b/scripts/rdma-migration-helper.sh
> new file mode 100755
> index 0000000000..08e29a52eb
> --- /dev/null
> +++ b/scripts/rdma-migration-helper.sh
> @@ -0,0 +1,48 @@
> +#!/bin/bash
> +
> +# Copied from blktests
> +get_ipv4_addr()
> +{
> +    ip -4 -o addr show dev "$1" |
> +        sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p' |
> +        tr -d '\n'
> +}
> +
> +has_soft_rdma()
> +{
> +    rdma link | grep -q " netdev $1[[:blank:]]*\$"
> +}
> +
> +rdma_rxe_setup_detect()
> +{
> +    (
> +        cd /sys/class/net &&
> +            for i in *; do
> +                [ -e "$i" ] || continue
> +                [ "$i" = "lo" ] && continue
> +                [ "$(<"$i/addr_len")" = 6 ] || continue
> +                [ "$(<"$i/carrier")" = 1 ] || continue
> +
> +                has_soft_rdma "$i" && break
> +                [ "$operation" = "setup" ] &&
> +                    rdma link add "${i}_rxe" type rxe netdev "$i" && break
> +            done
> +        has_soft_rdma "$i" || return
> +        get_ipv4_addr "$i"
> +    )
> +}
> +
> +operation=${1:-setup}
> +
> +command -v rdma >/dev/null || {
> +    echo "Command 'rdma' is not available, please install it first." >&2
> +    exit 1
> +}
> +
> +if [ "$operation" == "setup" ] || [ "$operation" == "detect" ]; then
> +    rdma_rxe_setup_detect
> +elif [ "$operation" == "clean" ]; then
> +    modprobe -r rdma_rxe
> +else
> +    echo "Usage: $0 [setup | detect | clean]"
> +fi
> diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
> index ba273d10b9..f1fe34020d 100644
> --- a/tests/qtest/migration/precopy-tests.c
> +++ b/tests/qtest/migration/precopy-tests.c
> @@ -99,6 +99,71 @@ static void test_precopy_unix_dirty_ring(void)
>       test_precopy_common(&args);
>   }
>   
> +#ifdef CONFIG_RDMA
> +
> +#define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh"
> +static int new_rdma_link(char *buffer, bool verbose)
> +{
> +    const char *argument = (geteuid() == 0) ? "setup" : "detect";
> +    char cmd[1024];
> +
> +    snprintf(cmd, sizeof(cmd), "%s %s %s", RDMA_MIGRATION_HELPER, argument,
> +             verbose ? "" : "2>/dev/null");
> +
> +    FILE *pipe = popen(cmd, "r");
> +    if (pipe == NULL) {
> +        perror("Failed to run script");
> +        return -1;
> +    }
> +
> +    int idx = 0;
> +    while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
> +        idx += strlen(buffer);
> +    }
> +
> +    int status = pclose(pipe);
> +    if (status == -1) {
> +        perror("Error reported by pclose()");
> +        return -1;
> +    } else if (WIFEXITED(status)) {
> +        return WEXITSTATUS(status);
> +    }
> +
> +    return -1;
> +}
> +
> +static void test_precopy_rdma_plain(void)
> +{
> +    char buffer[128] = {};
> +    bool verbose = g_getenv("QTEST_LOG");
> +
> +    if (new_rdma_link(buffer, verbose)) {
> +        g_test_skip("No rdma link available");
> +        if (verbose) {
> +            g_test_message(
> +                "To enable the test:\n"
> +                "(1) Run \'" RDMA_MIGRATION_HELPER
> +                " setup\' with root and rerun the test\n"
> +                "or\n(2) Run the test with root privilege");
> +        }
> +        return;
> +    }
> +
> +    /*
> +     * TODO: query a free port instead of hard code.
> +     * 29200=('R'+'D'+'M'+'A')*100
> +     **/
> +    g_autofree char *uri = g_strdup_printf("rdma:%s:29200", buffer);
> +
> +    MigrateCommon args = {
> +        .listen_uri = uri,
> +        .connect_uri = uri,
> +    };
> +
> +    test_precopy_common(&args);
> +}
> +#endif
> +
>   static void test_precopy_tcp_plain(void)
>   {
>       MigrateCommon args = {
> @@ -1124,6 +1189,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
>                          test_multifd_tcp_uri_none);
>       migration_test_add("/migration/multifd/tcp/plain/cancel",
>                          test_multifd_tcp_cancel);
> +#ifdef CONFIG_RDMA
> +    migration_test_add("/migration/precopy/rdma/plain",
> +                       test_precopy_rdma_plain);
> +#endif
>   }
>   
>   void migration_test_add_precopy(MigrationTestEnv *env)
Stefan Hajnoczi March 8, 2025, 8:42 a.m. UTC | #2
On Sat, Mar 8, 2025 at 2:01 PM Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
>
> Hi,
>
> On 7/3/25 19:15, Fabiano Rosas wrote:
> > From: Li Zhijian <lizhijian@fujitsu.com>
> >
> > This qtest requires there is a RDMA(RoCE) link in the host.
> > In order to make the test work smoothly, introduce a
> > scripts/rdma-migration-helper.sh to
> > - setup a new Soft-RoCE(aka RXE) if it's root
> > - detect existing RoCE link
> >
> > Test will be skipped if there is no available RoCE link.
>
> Is it? Runing as user I'm getting:
>
>    RDMA ERROR: RDMA host is not set!

The CI is failing too:
https://gitlab.com/qemu-project/qemu/-/jobs/9350004599#L5590

I have dropped this pull request for now. Please send a new revision
once the issue has been resolved.

Stefan

>
> Apparently called via:
>
> qemu_start_incoming_migration()
>    -> rdma_start_incoming_migration()
>       -> qemu_rdma_dest_init()
>
> >   # Start of rdma tests
> >   # Running /x86_64/migration/precopy/rdma/plain
> >   Command 'rdma' is not available, please install it first.
> >   # To enable the test:
> >   # (1) Run 'scripts/rdma-migration-helper.sh setup' with root and rerun the test
> >   # or
> >   # (2) Run the test with root privilege
>
> Could this might be the issue, should we skip if not root, as calling
> the script in "detect" mode makes the new_rdma_link() method to succeed.
>
> >   #
> >   ok 1 /x86_64/migration/precopy/rdma/plain # SKIP No rdma link available
> >   # End of rdma tests
> >
> > Note: Remove the newly added RXE link by executing 'modprobe -r rdma_rxe'
> > or by specifying 'clean' within this script.
>
> qtest_add() provides both setup() / teardown() methods.
>
> Test leaving system in different state seems bogus to me.
> More even if the information is buried in a commit description...
>
> We shouldn't merge this patch as is IMHO.
>
> Regards,
>
> Phil.
>
> > Reviewed-by: Peter Xu <peterx@redhat.com>
> > Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> > Message-ID: <20250305062825.772629-7-lizhijian@fujitsu.com>
> > [reformated the message to be under 90 characters]
> > Signed-off-by: Fabiano Rosas <farosas@suse.de>
> > ---
> >   MAINTAINERS                           |  1 +
> >   scripts/rdma-migration-helper.sh      | 48 +++++++++++++++++++
> >   tests/qtest/migration/precopy-tests.c | 69 +++++++++++++++++++++++++++
> >   3 files changed, 118 insertions(+)
> >   create mode 100755 scripts/rdma-migration-helper.sh
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 5df6020ed5..56e85adcfb 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -3517,6 +3517,7 @@ R: Li Zhijian <lizhijian@fujitsu.com>
> >   R: Peter Xu <peterx@redhat.com>
> >   S: Odd Fixes
> >   F: migration/rdma*
> > +F: scripts/rdma-migration-helper.sh
> >
> >   Migration dirty limit and dirty page rate
> >   M: Hyman Huang <yong.huang@smartx.com>
> > diff --git a/scripts/rdma-migration-helper.sh b/scripts/rdma-migration-helper.sh
> > new file mode 100755
> > index 0000000000..08e29a52eb
> > --- /dev/null
> > +++ b/scripts/rdma-migration-helper.sh
> > @@ -0,0 +1,48 @@
> > +#!/bin/bash
> > +
> > +# Copied from blktests
> > +get_ipv4_addr()
> > +{
> > +    ip -4 -o addr show dev "$1" |
> > +        sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p' |
> > +        tr -d '\n'
> > +}
> > +
> > +has_soft_rdma()
> > +{
> > +    rdma link | grep -q " netdev $1[[:blank:]]*\$"
> > +}
> > +
> > +rdma_rxe_setup_detect()
> > +{
> > +    (
> > +        cd /sys/class/net &&
> > +            for i in *; do
> > +                [ -e "$i" ] || continue
> > +                [ "$i" = "lo" ] && continue
> > +                [ "$(<"$i/addr_len")" = 6 ] || continue
> > +                [ "$(<"$i/carrier")" = 1 ] || continue
> > +
> > +                has_soft_rdma "$i" && break
> > +                [ "$operation" = "setup" ] &&
> > +                    rdma link add "${i}_rxe" type rxe netdev "$i" && break
> > +            done
> > +        has_soft_rdma "$i" || return
> > +        get_ipv4_addr "$i"
> > +    )
> > +}
> > +
> > +operation=${1:-setup}
> > +
> > +command -v rdma >/dev/null || {
> > +    echo "Command 'rdma' is not available, please install it first." >&2
> > +    exit 1
> > +}
> > +
> > +if [ "$operation" == "setup" ] || [ "$operation" == "detect" ]; then
> > +    rdma_rxe_setup_detect
> > +elif [ "$operation" == "clean" ]; then
> > +    modprobe -r rdma_rxe
> > +else
> > +    echo "Usage: $0 [setup | detect | clean]"
> > +fi
> > diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
> > index ba273d10b9..f1fe34020d 100644
> > --- a/tests/qtest/migration/precopy-tests.c
> > +++ b/tests/qtest/migration/precopy-tests.c
> > @@ -99,6 +99,71 @@ static void test_precopy_unix_dirty_ring(void)
> >       test_precopy_common(&args);
> >   }
> >
> > +#ifdef CONFIG_RDMA
> > +
> > +#define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh"
> > +static int new_rdma_link(char *buffer, bool verbose)
> > +{
> > +    const char *argument = (geteuid() == 0) ? "setup" : "detect";
> > +    char cmd[1024];
> > +
> > +    snprintf(cmd, sizeof(cmd), "%s %s %s", RDMA_MIGRATION_HELPER, argument,
> > +             verbose ? "" : "2>/dev/null");
> > +
> > +    FILE *pipe = popen(cmd, "r");
> > +    if (pipe == NULL) {
> > +        perror("Failed to run script");
> > +        return -1;
> > +    }
> > +
> > +    int idx = 0;
> > +    while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
> > +        idx += strlen(buffer);
> > +    }
> > +
> > +    int status = pclose(pipe);
> > +    if (status == -1) {
> > +        perror("Error reported by pclose()");
> > +        return -1;
> > +    } else if (WIFEXITED(status)) {
> > +        return WEXITSTATUS(status);
> > +    }
> > +
> > +    return -1;
> > +}
> > +
> > +static void test_precopy_rdma_plain(void)
> > +{
> > +    char buffer[128] = {};
> > +    bool verbose = g_getenv("QTEST_LOG");
> > +
> > +    if (new_rdma_link(buffer, verbose)) {
> > +        g_test_skip("No rdma link available");
> > +        if (verbose) {
> > +            g_test_message(
> > +                "To enable the test:\n"
> > +                "(1) Run \'" RDMA_MIGRATION_HELPER
> > +                " setup\' with root and rerun the test\n"
> > +                "or\n(2) Run the test with root privilege");
> > +        }
> > +        return;
> > +    }
> > +
> > +    /*
> > +     * TODO: query a free port instead of hard code.
> > +     * 29200=('R'+'D'+'M'+'A')*100
> > +     **/
> > +    g_autofree char *uri = g_strdup_printf("rdma:%s:29200", buffer);
> > +
> > +    MigrateCommon args = {
> > +        .listen_uri = uri,
> > +        .connect_uri = uri,
> > +    };
> > +
> > +    test_precopy_common(&args);
> > +}
> > +#endif
> > +
> >   static void test_precopy_tcp_plain(void)
> >   {
> >       MigrateCommon args = {
> > @@ -1124,6 +1189,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
> >                          test_multifd_tcp_uri_none);
> >       migration_test_add("/migration/multifd/tcp/plain/cancel",
> >                          test_multifd_tcp_cancel);
> > +#ifdef CONFIG_RDMA
> > +    migration_test_add("/migration/precopy/rdma/plain",
> > +                       test_precopy_rdma_plain);
> > +#endif
> >   }
> >
> >   void migration_test_add_precopy(MigrationTestEnv *env)
>
>
Zhijian Li (Fujitsu)" via March 10, 2025, 8:01 a.m. UTC | #3
Hi Philippe,

Thanks for your testing.


On 08/03/2025 14:00, Philippe Mathieu-Daudé wrote:
> Hi,
> 
> On 7/3/25 19:15, Fabiano Rosas wrote:
>> From: Li Zhijian <lizhijian@fujitsu.com>
>>
>> This qtest requires there is a RDMA(RoCE) link in the host.
>> In order to make the test work smoothly, introduce a
>> scripts/rdma-migration-helper.sh to
>> - setup a new Soft-RoCE(aka RXE) if it's root
>> - detect existing RoCE link
>>
>> Test will be skipped if there is no available RoCE link.
> 
> Is it? Runing as user I'm getting:
> 
>    RDMA ERROR: RDMA host is not set!


It's unexpected behavior.

It implies that the script terminated successfully with an exit
code of 0(there is a RDMA link), yet failed to display its canonical
IPv4 address.

Stefan also mentioned the same error...
https://gitlab.com/qemu-project/qemu/-/jobs/9350004599#L5590

I couldn't reproduce your error.

Could you share the output of this script with a normal user,
$ scripts/rdma-migration-helper.sh detect

if your had a rdma/RXE link, please share the output of its ip
$ ip -4 -o addr show dev <NIC>

Where the <NIC> is an interface associated with the RoCE(RXE), for example

$ rdma link
link enp2s0_rxe/1 state ACTIVE physical_state LINK_UP netdev enp2s0

then the <NIC> is enp2s0


> 
> Apparently called via:
> 
> qemu_start_incoming_migration()
>    -> rdma_start_incoming_migration()
>       -> qemu_rdma_dest_init()
> 
>>   # Start of rdma tests
>>   # Running /x86_64/migration/precopy/rdma/plain
>>   Command 'rdma' is not available, please install it first.
>>   # To enable the test:
>>   # (1) Run 'scripts/rdma-migration-helper.sh setup' with root and rerun the test
>>   # or
>>   # (2) Run the test with root privilege
> 
> Could this might be the issue, should we skip if not root, as calling
> the script in "detect" mode makes the new_rdma_link() method to succeed.

It's expected the 'detect' should succeed and print a IPv4 address

> 
>>   #
>>   ok 1 /x86_64/migration/precopy/rdma/plain # SKIP No rdma link available
>>   # End of rdma tests
>>
>> Note: Remove the newly added RXE link by executing 'modprobe -r rdma_rxe'
>> or by specifying 'clean' within this script.
> 
> qtest_add() provides both setup() / teardown() methods.> 

This may require a minor refactor of the migration-test framework to
enable support for setup() and teardown() methods.

Let me see...



> Test leaving system in different state seems bogus to me.

At this point, I'm unable to refute. It indeed dirty the
It indeed might dirty the system.


A palatable compromise might be that, regardless of whether one is a
root user or not, this test is only supported on hosts with RDMA link.

Otherwise, it will provide an SKIP warning.

      # Run 'scripts/rdma-migration-helper.sh setup' with root and rerun the test
      # Optional: run 'scripts/rdma-migration-helper.sh clean' to revert the 'setup'

For local users, they can independently use this script to set up and clean RDMA,
as they are aware of the modifications they have made to the system.

Thanks
Zhijian

> More even if the information is buried in a commit description...> 
> We shouldn't merge this patch as is IMHO.
> 
> Regards,
> 
> Phil.
> 
>> Reviewed-by: Peter Xu <peterx@redhat.com>
>> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
>> Message-ID: <20250305062825.772629-7-lizhijian@fujitsu.com>
>> [reformated the message to be under 90 characters]
>> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>> ---
>>   MAINTAINERS                           |  1 +
>>   scripts/rdma-migration-helper.sh      | 48 +++++++++++++++++++
>>   tests/qtest/migration/precopy-tests.c | 69 +++++++++++++++++++++++++++
>>   3 files changed, 118 insertions(+)
>>   create mode 100755 scripts/rdma-migration-helper.sh
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index 5df6020ed5..56e85adcfb 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -3517,6 +3517,7 @@ R: Li Zhijian <lizhijian@fujitsu.com>
>>   R: Peter Xu <peterx@redhat.com>
>>   S: Odd Fixes
>>   F: migration/rdma*
>> +F: scripts/rdma-migration-helper.sh
>>   Migration dirty limit and dirty page rate
>>   M: Hyman Huang <yong.huang@smartx.com>
>> diff --git a/scripts/rdma-migration-helper.sh b/scripts/rdma-migration-helper.sh
>> new file mode 100755
>> index 0000000000..08e29a52eb
>> --- /dev/null
>> +++ b/scripts/rdma-migration-helper.sh
>> @@ -0,0 +1,48 @@
>> +#!/bin/bash
>> +
>> +# Copied from blktests
>> +get_ipv4_addr()
>> +{
>> +    ip -4 -o addr show dev "$1" |
>> +        sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p' |
>> +        tr -d '\n'
>> +}
>> +
>> +has_soft_rdma()
>> +{
>> +    rdma link | grep -q " netdev $1[[:blank:]]*\$"
>> +}
>> +
>> +rdma_rxe_setup_detect()
>> +{
>> +    (
>> +        cd /sys/class/net &&
>> +            for i in *; do
>> +                [ -e "$i" ] || continue
>> +                [ "$i" = "lo" ] && continue
>> +                [ "$(<"$i/addr_len")" = 6 ] || continue
>> +                [ "$(<"$i/carrier")" = 1 ] || continue
>> +
>> +                has_soft_rdma "$i" && break
>> +                [ "$operation" = "setup" ] &&
>> +                    rdma link add "${i}_rxe" type rxe netdev "$i" && break
>> +            done
>> +        has_soft_rdma "$i" || return
>> +        get_ipv4_addr "$i"
>> +    )
>> +}
>> +
>> +operation=${1:-setup}
>> +
>> +command -v rdma >/dev/null || {
>> +    echo "Command 'rdma' is not available, please install it first." >&2
>> +    exit 1
>> +}
>> +
>> +if [ "$operation" == "setup" ] || [ "$operation" == "detect" ]; then
>> +    rdma_rxe_setup_detect
>> +elif [ "$operation" == "clean" ]; then
>> +    modprobe -r rdma_rxe
>> +else
>> +    echo "Usage: $0 [setup | detect | clean]"
>> +fi
>> diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
>> index ba273d10b9..f1fe34020d 100644
>> --- a/tests/qtest/migration/precopy-tests.c
>> +++ b/tests/qtest/migration/precopy-tests.c
>> @@ -99,6 +99,71 @@ static void test_precopy_unix_dirty_ring(void)
>>       test_precopy_common(&args);
>>   }
>> +#ifdef CONFIG_RDMA
>> +
>> +#define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh"
>> +static int new_rdma_link(char *buffer, bool verbose)
>> +{
>> +    const char *argument = (geteuid() == 0) ? "setup" : "detect";
>> +    char cmd[1024];
>> +
>> +    snprintf(cmd, sizeof(cmd), "%s %s %s", RDMA_MIGRATION_HELPER, argument,
>> +             verbose ? "" : "2>/dev/null");
>> +
>> +    FILE *pipe = popen(cmd, "r");
>> +    if (pipe == NULL) {
>> +        perror("Failed to run script");
>> +        return -1;
>> +    }
>> +
>> +    int idx = 0;
>> +    while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
>> +        idx += strlen(buffer);
>> +    }
>> +
>> +    int status = pclose(pipe);
>> +    if (status == -1) {
>> +        perror("Error reported by pclose()");
>> +        return -1;
>> +    } else if (WIFEXITED(status)) {
>> +        return WEXITSTATUS(status);
>> +    }
>> +
>> +    return -1;
>> +}
>> +
>> +static void test_precopy_rdma_plain(void)
>> +{
>> +    char buffer[128] = {};
>> +    bool verbose = g_getenv("QTEST_LOG");
>> +
>> +    if (new_rdma_link(buffer, verbose)) {
>> +        g_test_skip("No rdma link available");
>> +        if (verbose) {
>> +            g_test_message(
>> +                "To enable the test:\n"
>> +                "(1) Run \'" RDMA_MIGRATION_HELPER
>> +                " setup\' with root and rerun the test\n"
>> +                "or\n(2) Run the test with root privilege");
>> +        }
>> +        return;
>> +    }
>> +
>> +    /*
>> +     * TODO: query a free port instead of hard code.
>> +     * 29200=('R'+'D'+'M'+'A')*100
>> +     **/
>> +    g_autofree char *uri = g_strdup_printf("rdma:%s:29200", buffer);
>> +
>> +    MigrateCommon args = {
>> +        .listen_uri = uri,
>> +        .connect_uri = uri,
>> +    };
>> +
>> +    test_precopy_common(&args);
>> +}
>> +#endif
>> +
>>   static void test_precopy_tcp_plain(void)
>>   {
>>       MigrateCommon args = {
>> @@ -1124,6 +1189,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
>>                          test_multifd_tcp_uri_none);
>>       migration_test_add("/migration/multifd/tcp/plain/cancel",
>>                          test_multifd_tcp_cancel);
>> +#ifdef CONFIG_RDMA
>> +    migration_test_add("/migration/precopy/rdma/plain",
>> +                       test_precopy_rdma_plain);
>> +#endif
>>   }
>>   void migration_test_add_precopy(MigrationTestEnv *env)
>
Zhijian Li (Fujitsu)" via March 10, 2025, 8:33 a.m. UTC | #4
Hi Stefan,

Copied to gitlab CI,

On 08/03/2025 16:42, Stefan Hajnoczi wrote:
> On Sat, Mar 8, 2025 at 2:01 PM Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
>>
>> Hi,
>>
>> On 7/3/25 19:15, Fabiano Rosas wrote:
>>> From: Li Zhijian <lizhijian@fujitsu.com>
>>>
>>> This qtest requires there is a RDMA(RoCE) link in the host.
>>> In order to make the test work smoothly, introduce a
>>> scripts/rdma-migration-helper.sh to
>>> - setup a new Soft-RoCE(aka RXE) if it's root
>>> - detect existing RoCE link
>>>
>>> Test will be skipped if there is no available RoCE link.
>>
>> Is it? Runing as user I'm getting:
>>
>>     RDMA ERROR: RDMA host is not set!
> 
> The CI is failing too:
> https://gitlab.com/qemu-project/qemu/-/jobs/9350004599#L5590

Thanks for this info, unfortunately, there is no 'testlog.txt' in this gitlab-ci.

I learned that x86 runner worked well
https://gitlab.com/qemu-project/qemu/-/jobs/9350004633

So I doubt this is aarch64 specific, but I don't have an aarch64 in hand.


Cced @CI guys:
So I prefer to send a patch to make gitlab-ci to 'cat testlog.txt' in 'check' failure.
What are your thoughts on this?

the CI diff would like:

diff --git a/.gitlab-ci.d/buildtest-template.yml b/.gitlab-ci.d/buildtest-template.yml
index 4cc1923931..b2592faa15 100644
--- a/.gitlab-ci.d/buildtest-template.yml
+++ b/.gitlab-ci.d/buildtest-template.yml
@@ -38,7 +38,7 @@
      - section_start test "Running tests"
      - if test -n "$MAKE_CHECK_ARGS";
        then
-        $MAKE -j"$JOBS" $MAKE_CHECK_ARGS ;
+        $MAKE -j"$JOBS" $MAKE_CHECK_ARGS || { cat meson-logs/testlog.txt 2>/dev/null; false; }
        fi
      - section_end test
      - ccache --show-stats
@@ -77,7 +77,7 @@
        fi
      - section_end buildenv
      - section_start test "Running tests"
-    - $MAKE NINJA=":" $MAKE_CHECK_ARGS
+    - $MAKE NINJA=":" $MAKE_CHECK_ARGS || { cat meson-logs/testlog.txt 2>/dev/null; false; }
      - section_end test
  
  .native_test_job_template:
diff --git a/.gitlab-ci.d/crossbuild-template.yml b/.gitlab-ci.d/crossbuild-template.yml
index 303943f818..88ff592419 100644
--- a/.gitlab-ci.d/crossbuild-template.yml
+++ b/.gitlab-ci.d/crossbuild-template.yml
@@ -36,7 +36,7 @@
      - section_start test "Running tests"
      - if test -n "$MAKE_CHECK_ARGS";
        then
-        $MAKE -j"$JOBS" $MAKE_CHECK_ARGS ;
+        $MAKE -j"$JOBS" $MAKE_CHECK_ARGS || { cat meson-logs/testlog.txt 2>/dev/null; false; }
        fi
      - section_end test
      - section_start installer "Building the installer"
@@ -82,7 +82,7 @@
      - section_start test "Running tests"
      - if test -n "$MAKE_CHECK_ARGS";
        then
-        $MAKE -j"$JOBS" $MAKE_CHECK_ARGS ;
+        $MAKE -j"$JOBS" $MAKE_CHECK_ARGS || { cat meson-logs/testlog.txt 2>/dev/null; false; }
        fi
      - section_end test
  
@@ -116,7 +116,7 @@
      - section_start test "Running tests"
      - if test -n "$MAKE_CHECK_ARGS";
        then
-        $MAKE -j"$JOBS" $MAKE_CHECK_ARGS ;
+        $MAKE -j"$JOBS" $MAKE_CHECK_ARGS || { cat meson-logs/testlog.txt 2>/dev/null; false; }
        fi
      - section_end test
  
diff --git a/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch32.yml b/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch32.yml
index 8727687e2b..b7a4e3c599 100644
--- a/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch32.yml
+++ b/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch32.yml
@@ -23,3 +23,4 @@ ubuntu-22.04-aarch32-all:
     || { cat config.log meson-logs/meson-log.txt; exit 1; }
   - make --output-sync -j`nproc --ignore=40`
   - make --output-sync -j`nproc --ignore=40` check
+   || { cat meson-logs/testlog.txt 2>/dev/null; false; }
diff --git a/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch64.yml b/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch64.yml
index ca2f140471..6fcb576a59 100644
--- a/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch64.yml
+++ b/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch64.yml
@@ -22,6 +22,7 @@ ubuntu-22.04-aarch64-all-linux-static:
   - make --output-sync -j`nproc --ignore=40`
   - make check-tcg
   - make --output-sync -j`nproc --ignore=40` check
+   || { cat meson-logs/testlog.txt 2>/dev/null; false; }
  
  ubuntu-22.04-aarch64-all:
   extends: .custom_runner_template
@@ -44,6 +45,7 @@ ubuntu-22.04-aarch64-all:
     || { cat config.log meson-logs/meson-log.txt; exit 1; }
   - make --output-sync -j`nproc --ignore=40`
   - make --output-sync -j`nproc --ignore=40` check
+   || { cat meson-logs/testlog.txt 2>/dev/null; false; }
  
  ubuntu-22.04-aarch64-without-defaults:
   extends: .custom_runner_template
@@ -66,6 +68,7 @@ ubuntu-22.04-aarch64-without-defaults:
     || { cat config.log meson-logs/meson-log.txt; exit 1; }
   - make --output-sync -j`nproc --ignore=40`
   - make --output-sync -j`nproc --ignore=40` check
+   || { cat meson-logs/testlog.txt 2>/dev/null; false; }
  
  ubuntu-22.04-aarch64-alldbg:
   extends: .custom_runner_template
@@ -85,6 +88,7 @@ ubuntu-22.04-aarch64-alldbg:
   - make clean
   - make --output-sync -j`nproc --ignore=40`
   - make --output-sync -j`nproc --ignore=40` check
+   || { cat meson-logs/testlog.txt 2>/dev/null; false; }
  
  ubuntu-22.04-aarch64-clang:
   extends: .custom_runner_template
@@ -107,6 +111,7 @@ ubuntu-22.04-aarch64-clang:
     || { cat config.log meson-logs/meson-log.txt; exit 1; }
   - make --output-sync -j`nproc --ignore=40`
   - make --output-sync -j`nproc --ignore=40` check
+   || { cat meson-logs/testlog.txt 2>/dev/null; false; }
  
  ubuntu-22.04-aarch64-tci:
   needs: []
@@ -149,3 +154,4 @@ ubuntu-22.04-aarch64-notcg:
     || { cat config.log meson-logs/meson-log.txt; exit 1; }
   - make --output-sync -j`nproc --ignore=40`
   - make --output-sync -j`nproc --ignore=40` check
+   || { cat meson-logs/testlog.txt 2>/dev/null; false; }
diff --git a/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml b/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml
index ca374acb8c..35e36f4124 100644
--- a/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml
+++ b/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml
@@ -20,6 +20,7 @@ ubuntu-22.04-s390x-all-linux:
   - make --output-sync -j`nproc`
   - make --output-sync check-tcg
   - make --output-sync -j`nproc` check
+   || { cat meson-logs/testlog.txt 2>/dev/null; false; }
  
  ubuntu-22.04-s390x-all-system:
   extends: .custom_runner_template
@@ -39,6 +40,7 @@ ubuntu-22.04-s390x-all-system:
     || { cat config.log meson-logs/meson-log.txt; exit 1; }
   - make --output-sync -j`nproc`
   - make --output-sync -j`nproc` check
+   || { cat meson-logs/testlog.txt 2>/dev/null; false; }
  
  ubuntu-22.04-s390x-alldbg:
   extends: .custom_runner_template
@@ -62,6 +64,7 @@ ubuntu-22.04-s390x-alldbg:
   - make clean
   - make --output-sync -j`nproc`
   - make --output-sync -j`nproc` check
+   || { cat meson-logs/testlog.txt 2>/dev/null; false; }
  
  ubuntu-22.04-s390x-clang:
   extends: .custom_runner_template
@@ -84,6 +87,7 @@ ubuntu-22.04-s390x-clang:
     || { cat config.log meson-logs/meson-log.txt; exit 1; }
   - make --output-sync -j`nproc`
   - make --output-sync -j`nproc` check
+   || { cat meson-logs/testlog.txt 2>/dev/null; false; }
  
  ubuntu-22.04-s390x-tci:
   needs: []
@@ -126,3 +130,4 @@ ubuntu-22.04-s390x-notcg:
     || { cat config.log meson-logs/meson-log.txt; exit 1; }
   - make --output-sync -j`nproc`
   - make --output-sync -j`nproc` check
+   || { cat meson-logs/testlog.txt 2>/dev/null; false; }


> 
> I have dropped this pull request for now. Please send a new revision
> once the issue has been resolved.
> 
> Stefan
> 
>>
>> Apparently called via:
>>
>> qemu_start_incoming_migration()
>>     -> rdma_start_incoming_migration()
>>        -> qemu_rdma_dest_init()
>>
>>>    # Start of rdma tests
>>>    # Running /x86_64/migration/precopy/rdma/plain
>>>    Command 'rdma' is not available, please install it first.
>>>    # To enable the test:
>>>    # (1) Run 'scripts/rdma-migration-helper.sh setup' with root and rerun the test
>>>    # or
>>>    # (2) Run the test with root privilege
>>
>> Could this might be the issue, should we skip if not root, as calling
>> the script in "detect" mode makes the new_rdma_link() method to succeed.
>>
>>>    #
>>>    ok 1 /x86_64/migration/precopy/rdma/plain # SKIP No rdma link available
>>>    # End of rdma tests
>>>
>>> Note: Remove the newly added RXE link by executing 'modprobe -r rdma_rxe'
>>> or by specifying 'clean' within this script.
>>
>> qtest_add() provides both setup() / teardown() methods.
>>
>> Test leaving system in different state seems bogus to me.
>> More even if the information is buried in a commit description...
>>
>> We shouldn't merge this patch as is IMHO.
>>
>> Regards,
>>
>> Phil.
>>
>>> Reviewed-by: Peter Xu <peterx@redhat.com>
>>> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
>>> Message-ID: <20250305062825.772629-7-lizhijian@fujitsu.com>
>>> [reformated the message to be under 90 characters]
>>> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>>> ---
>>>    MAINTAINERS                           |  1 +
>>>    scripts/rdma-migration-helper.sh      | 48 +++++++++++++++++++
>>>    tests/qtest/migration/precopy-tests.c | 69 +++++++++++++++++++++++++++
>>>    3 files changed, 118 insertions(+)
>>>    create mode 100755 scripts/rdma-migration-helper.sh
>>>
>>> diff --git a/MAINTAINERS b/MAINTAINERS
>>> index 5df6020ed5..56e85adcfb 100644
>>> --- a/MAINTAINERS
>>> +++ b/MAINTAINERS
>>> @@ -3517,6 +3517,7 @@ R: Li Zhijian <lizhijian@fujitsu.com>
>>>    R: Peter Xu <peterx@redhat.com>
>>>    S: Odd Fixes
>>>    F: migration/rdma*
>>> +F: scripts/rdma-migration-helper.sh
>>>
>>>    Migration dirty limit and dirty page rate
>>>    M: Hyman Huang <yong.huang@smartx.com>
>>> diff --git a/scripts/rdma-migration-helper.sh b/scripts/rdma-migration-helper.sh
>>> new file mode 100755
>>> index 0000000000..08e29a52eb
>>> --- /dev/null
>>> +++ b/scripts/rdma-migration-helper.sh
>>> @@ -0,0 +1,48 @@
>>> +#!/bin/bash
>>> +
>>> +# Copied from blktests
>>> +get_ipv4_addr()
>>> +{
>>> +    ip -4 -o addr show dev "$1" |
>>> +        sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p' |
>>> +        tr -d '\n'
>>> +}
>>> +
>>> +has_soft_rdma()
>>> +{
>>> +    rdma link | grep -q " netdev $1[[:blank:]]*\$"
>>> +}
>>> +
>>> +rdma_rxe_setup_detect()
>>> +{
>>> +    (
>>> +        cd /sys/class/net &&
>>> +            for i in *; do
>>> +                [ -e "$i" ] || continue
>>> +                [ "$i" = "lo" ] && continue
>>> +                [ "$(<"$i/addr_len")" = 6 ] || continue
>>> +                [ "$(<"$i/carrier")" = 1 ] || continue
>>> +
>>> +                has_soft_rdma "$i" && break
>>> +                [ "$operation" = "setup" ] &&
>>> +                    rdma link add "${i}_rxe" type rxe netdev "$i" && break
>>> +            done
>>> +        has_soft_rdma "$i" || return
>>> +        get_ipv4_addr "$i"
>>> +    )
>>> +}
>>> +
>>> +operation=${1:-setup}
>>> +
>>> +command -v rdma >/dev/null || {
>>> +    echo "Command 'rdma' is not available, please install it first." >&2
>>> +    exit 1
>>> +}
>>> +
>>> +if [ "$operation" == "setup" ] || [ "$operation" == "detect" ]; then
>>> +    rdma_rxe_setup_detect
>>> +elif [ "$operation" == "clean" ]; then
>>> +    modprobe -r rdma_rxe
>>> +else
>>> +    echo "Usage: $0 [setup | detect | clean]"
>>> +fi
>>> diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
>>> index ba273d10b9..f1fe34020d 100644
>>> --- a/tests/qtest/migration/precopy-tests.c
>>> +++ b/tests/qtest/migration/precopy-tests.c
>>> @@ -99,6 +99,71 @@ static void test_precopy_unix_dirty_ring(void)
>>>        test_precopy_common(&args);
>>>    }
>>>
>>> +#ifdef CONFIG_RDMA
>>> +
>>> +#define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh"
>>> +static int new_rdma_link(char *buffer, bool verbose)
>>> +{
>>> +    const char *argument = (geteuid() == 0) ? "setup" : "detect";
>>> +    char cmd[1024];
>>> +
>>> +    snprintf(cmd, sizeof(cmd), "%s %s %s", RDMA_MIGRATION_HELPER, argument,
>>> +             verbose ? "" : "2>/dev/null");
>>> +
>>> +    FILE *pipe = popen(cmd, "r");
>>> +    if (pipe == NULL) {
>>> +        perror("Failed to run script");
>>> +        return -1;
>>> +    }
>>> +
>>> +    int idx = 0;
>>> +    while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
>>> +        idx += strlen(buffer);
>>> +    }
>>> +
>>> +    int status = pclose(pipe);
>>> +    if (status == -1) {
>>> +        perror("Error reported by pclose()");
>>> +        return -1;
>>> +    } else if (WIFEXITED(status)) {
>>> +        return WEXITSTATUS(status);
>>> +    }
>>> +
>>> +    return -1;
>>> +}
>>> +
>>> +static void test_precopy_rdma_plain(void)
>>> +{
>>> +    char buffer[128] = {};
>>> +    bool verbose = g_getenv("QTEST_LOG");
>>> +
>>> +    if (new_rdma_link(buffer, verbose)) {
>>> +        g_test_skip("No rdma link available");
>>> +        if (verbose) {
>>> +            g_test_message(
>>> +                "To enable the test:\n"
>>> +                "(1) Run \'" RDMA_MIGRATION_HELPER
>>> +                " setup\' with root and rerun the test\n"
>>> +                "or\n(2) Run the test with root privilege");
>>> +        }
>>> +        return;
>>> +    }
>>> +
>>> +    /*
>>> +     * TODO: query a free port instead of hard code.
>>> +     * 29200=('R'+'D'+'M'+'A')*100
>>> +     **/
>>> +    g_autofree char *uri = g_strdup_printf("rdma:%s:29200", buffer);
>>> +
>>> +    MigrateCommon args = {
>>> +        .listen_uri = uri,
>>> +        .connect_uri = uri,
>>> +    };
>>> +
>>> +    test_precopy_common(&args);
>>> +}
>>> +#endif
>>> +
>>>    static void test_precopy_tcp_plain(void)
>>>    {
>>>        MigrateCommon args = {
>>> @@ -1124,6 +1189,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
>>>                           test_multifd_tcp_uri_none);
>>>        migration_test_add("/migration/multifd/tcp/plain/cancel",
>>>                           test_multifd_tcp_cancel);
>>> +#ifdef CONFIG_RDMA
>>> +    migration_test_add("/migration/precopy/rdma/plain",
>>> +                       test_precopy_rdma_plain);
>>> +#endif
>>>    }
>>>
>>>    void migration_test_add_precopy(MigrationTestEnv *env)
>>
>>
Peter Xu March 10, 2025, 2:36 p.m. UTC | #5
On Mon, Mar 10, 2025 at 08:33:14AM +0000, Zhijian Li (Fujitsu) wrote:
> Hi Stefan,
> 
> Copied to gitlab CI,
> 
> On 08/03/2025 16:42, Stefan Hajnoczi wrote:
> > On Sat, Mar 8, 2025 at 2:01 PM Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
> >>
> >> Hi,
> >>
> >> On 7/3/25 19:15, Fabiano Rosas wrote:
> >>> From: Li Zhijian <lizhijian@fujitsu.com>
> >>>
> >>> This qtest requires there is a RDMA(RoCE) link in the host.
> >>> In order to make the test work smoothly, introduce a
> >>> scripts/rdma-migration-helper.sh to
> >>> - setup a new Soft-RoCE(aka RXE) if it's root
> >>> - detect existing RoCE link
> >>>
> >>> Test will be skipped if there is no available RoCE link.
> >>
> >> Is it? Runing as user I'm getting:
> >>
> >>     RDMA ERROR: RDMA host is not set!
> > 
> > The CI is failing too:
> > https://gitlab.com/qemu-project/qemu/-/jobs/9350004599#L5590
> 
> Thanks for this info, unfortunately, there is no 'testlog.txt' in this gitlab-ci.

It has it.  Try look for "Job artifacts", then there're "Download" or
"Browse" for testlog.txt.  But there isn't much info.

# Start of rdma tests
# Running /aarch64/migration/precopy/rdma/plain
# Using machine type: virt-10.0
# starting QEMU: exec ./qemu-system-aarch64 -qtest unix:/tmp/qtest-1127030.sock -qtest-log /dev/null -chardev socket,path=/tmp/qtest-1127030.qmp,id=char0 -mon chardev=char0,mode=control -display none -audio none -accel kvm -accel tcg -machine virt-10.0,gic-version=3 -name source,debug-threads=on -m 150M  -serial file:/tmp/migration-test-R1OX22/src_serial -cpu max -kernel /tmp/migration-test-R1OX22/bootsect    -accel qtest
# starting QEMU: exec ./qemu-system-aarch64 -qtest unix:/tmp/qtest-1127030.sock -qtest-log /dev/null -chardev socket,path=/tmp/qtest-1127030.qmp,id=char0 -mon chardev=char0,mode=control -display none -audio none -accel kvm -accel tcg -machine virt-10.0,gic-version=3 -name target,debug-threads=on -m 150M  -serial file:/tmp/migration-test-R1OX22/dest_serial -incoming rdma::29200  -cpu max -kernel /tmp/migration-test-R1OX22/bootsect    -accel qtest
----------------------------------- stderr -----------------------------------
qemu-system-aarch64: -incoming rdma::29200: RDMA ERROR: RDMA host is not set!
Broken pipe
../tests/qtest/libqtest.c:199: kill_qemu() tried to terminate QEMU process but encountered exit status 1 (expected 0)

> 
> I learned that x86 runner worked well
> https://gitlab.com/qemu-project/qemu/-/jobs/9350004633
> 
> So I doubt this is aarch64 specific, but I don't have an aarch64 in hand.

I think it means it'll exit 0 even without ipv4 address in the script.  I
doubt whether we used to rely on:

  command -v rdma

But maybe that's available on the reproduced hosts, so it'll pass there.
OTOH, the script should fail the script if no avail ipv4 addr found.

To be explicit, the script does this:

  has_soft_rdma "$i" || return

So even if it failed to see the soft rdma and returned, IIUC
rdma_rxe_setup_detect() will still success.

Maybe it should be this instead?

  has_soft_rdma "$i" || exit -1

We could also sanity check the ipv4 address, e.g.:

  rdma_rxe_setup_detect | grep -Eo '^[0-9]{1,3}(\.[0-9]{1,3}){3}$'
Fabiano Rosas March 10, 2025, 3 p.m. UTC | #6
"Zhijian Li (Fujitsu)" via <qemu-devel@nongnu.org> writes:

> Hi Philippe,
>
> Thanks for your testing.
>
>
> On 08/03/2025 14:00, Philippe Mathieu-Daudé wrote:
>> Hi,
>> 
>> On 7/3/25 19:15, Fabiano Rosas wrote:
>>> From: Li Zhijian <lizhijian@fujitsu.com>
>>>
>>> This qtest requires there is a RDMA(RoCE) link in the host.
>>> In order to make the test work smoothly, introduce a
>>> scripts/rdma-migration-helper.sh to
>>> - setup a new Soft-RoCE(aka RXE) if it's root
>>> - detect existing RoCE link
>>>
>>> Test will be skipped if there is no available RoCE link.
>> 
>> Is it? Runing as user I'm getting:
>> 
>>    RDMA ERROR: RDMA host is not set!
>
>
> It's unexpected behavior.
>
> It implies that the script terminated successfully with an exit
> code of 0(there is a RDMA link), yet failed to display its canonical
> IPv4 address.
>
> Stefan also mentioned the same error...
> https://gitlab.com/qemu-project/qemu/-/jobs/9350004599#L5590
>
> I couldn't reproduce your error.
>
> Could you share the output of this script with a normal user,
> $ scripts/rdma-migration-helper.sh detect
>
> if your had a rdma/RXE link, please share the output of its ip
> $ ip -4 -o addr show dev <NIC>
>
> Where the <NIC> is an interface associated with the RoCE(RXE), for example
>
> $ rdma link
> link enp2s0_rxe/1 state ACTIVE physical_state LINK_UP netdev enp2s0
>
> then the <NIC> is enp2s0
>
>
>> 
>> Apparently called via:
>> 
>> qemu_start_incoming_migration()
>>    -> rdma_start_incoming_migration()
>>       -> qemu_rdma_dest_init()
>> 
>>>   # Start of rdma tests
>>>   # Running /x86_64/migration/precopy/rdma/plain
>>>   Command 'rdma' is not available, please install it first.
>>>   # To enable the test:
>>>   # (1) Run 'scripts/rdma-migration-helper.sh setup' with root and rerun the test
>>>   # or
>>>   # (2) Run the test with root privilege
>> 
>> Could this might be the issue, should we skip if not root, as calling
>> the script in "detect" mode makes the new_rdma_link() method to succeed.
>
> It's expected the 'detect' should succeed and print a IPv4 address
>
>> 
>>>   #
>>>   ok 1 /x86_64/migration/precopy/rdma/plain # SKIP No rdma link available
>>>   # End of rdma tests
>>>
>>> Note: Remove the newly added RXE link by executing 'modprobe -r rdma_rxe'
>>> or by specifying 'clean' within this script.
>> 
>> qtest_add() provides both setup() / teardown() methods.> 
>
> This may require a minor refactor of the migration-test framework to
> enable support for setup() and teardown() methods.
>
> Let me see...
>
>
>
>> Test leaving system in different state seems bogus to me.
>
> At this point, I'm unable to refute. It indeed dirty the
> It indeed might dirty the system.
>
>
> A palatable compromise might be that, regardless of whether one is a
> root user or not, this test is only supported on hosts with RDMA link.
>
> Otherwise, it will provide an SKIP warning.
>
>       # Run 'scripts/rdma-migration-helper.sh setup' with root and rerun the test
>       # Optional: run 'scripts/rdma-migration-helper.sh clean' to revert the 'setup'
>
> For local users, they can independently use this script to set up and clean RDMA,
> as they are aware of the modifications they have made to the system.

Yes, let's skip it unless the user has very explicitly set things up.
Zhijian Li (Fujitsu)" via March 11, 2025, 2:06 a.m. UTC | #7
On 10/03/2025 22:36, Peter Xu wrote:
> On Mon, Mar 10, 2025 at 08:33:14AM +0000, Zhijian Li (Fujitsu) wrote:
>> Hi Stefan,
>>
>> Copied to gitlab CI,
>>
>> On 08/03/2025 16:42, Stefan Hajnoczi wrote:
>>> On Sat, Mar 8, 2025 at 2:01 PM Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
>>>>
>>>> Hi,
>>>>
>>>> On 7/3/25 19:15, Fabiano Rosas wrote:
>>>>> From: Li Zhijian <lizhijian@fujitsu.com>
>>>>>
>>>>> This qtest requires there is a RDMA(RoCE) link in the host.
>>>>> In order to make the test work smoothly, introduce a
>>>>> scripts/rdma-migration-helper.sh to
>>>>> - setup a new Soft-RoCE(aka RXE) if it's root
>>>>> - detect existing RoCE link
>>>>>
>>>>> Test will be skipped if there is no available RoCE link.
>>>>
>>>> Is it? Runing as user I'm getting:
>>>>
>>>>      RDMA ERROR: RDMA host is not set!
>>>
>>> The CI is failing too:
>>> https://gitlab.com/qemu-project/qemu/-/jobs/9350004599#L5590
>>
>> Thanks for this info, unfortunately, there is no 'testlog.txt' in this gitlab-ci.
> 
> It has it.  Try look for "Job artifacts", then there're "Download" or
> "Browse" for testlog.txt.  But there isn't much info.

Thanks for this information.


> 
> # Start of rdma tests
> # Running /aarch64/migration/precopy/rdma/plain
> # Using machine type: virt-10.0
> # starting QEMU: exec ./qemu-system-aarch64 -qtest unix:/tmp/qtest-1127030.sock -qtest-log /dev/null -chardev socket,path=/tmp/qtest-1127030.qmp,id=char0 -mon chardev=char0,mode=control -display none -audio none -accel kvm -accel tcg -machine virt-10.0,gic-version=3 -name source,debug-threads=on -m 150M  -serial file:/tmp/migration-test-R1OX22/src_serial -cpu max -kernel /tmp/migration-test-R1OX22/bootsect    -accel qtest
> # starting QEMU: exec ./qemu-system-aarch64 -qtest unix:/tmp/qtest-1127030.sock -qtest-log /dev/null -chardev socket,path=/tmp/qtest-1127030.qmp,id=char0 -mon chardev=char0,mode=control -display none -audio none -accel kvm -accel tcg -machine virt-10.0,gic-version=3 -name target,debug-threads=on -m 150M  -serial file:/tmp/migration-test-R1OX22/dest_serial -incoming rdma::29200  -cpu max -kernel /tmp/migration-test-R1OX22/bootsect    -accel qtest
> ----------------------------------- stderr -----------------------------------
> qemu-system-aarch64: -incoming rdma::29200: RDMA ERROR: RDMA host is not set!
> Broken pipe
> ../tests/qtest/libqtest.c:199: kill_qemu() tried to terminate QEMU process but encountered exit status 1 (expected 0)
> 
>>
>> I learned that x86 runner worked well
>> https://gitlab.com/qemu-project/qemu/-/jobs/9350004633
>>
>> So I doubt this is aarch64 specific, but I don't have an aarch64 in hand.
> 
> I think it means it'll exit 0 even without ipv4 address in the script.  I
> doubt whether we used to rely on:
> 
>    command -v rdma
> 
> But maybe that's available on the reproduced hosts, so it'll pass there.
> OTOH, the script should fail the script if no avail ipv4 addr found.


Yes, I believe this is the cause. I have reproduced it with a rdma link without a ipv4 address.



> 
> To be explicit, the script does this:
> 
>    has_soft_rdma "$i" || return
> 
> So even if it failed to see the soft rdma and returned, IIUC
> rdma_rxe_setup_detect() will still success.
> 
> Maybe it should be this instead?
> 
>    has_soft_rdma "$i" || exit -1
> 
> We could also sanity check the ipv4 address, e.g.:
> 
>    rdma_rxe_setup_detect | grep -Eo '^[0-9]{1,3}(\.[0-9]{1,3}){3}$'
> 

Yeah, this can make the script more robust.
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 5df6020ed5..56e85adcfb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3517,6 +3517,7 @@  R: Li Zhijian <lizhijian@fujitsu.com>
 R: Peter Xu <peterx@redhat.com>
 S: Odd Fixes
 F: migration/rdma*
+F: scripts/rdma-migration-helper.sh
 
 Migration dirty limit and dirty page rate
 M: Hyman Huang <yong.huang@smartx.com>
diff --git a/scripts/rdma-migration-helper.sh b/scripts/rdma-migration-helper.sh
new file mode 100755
index 0000000000..08e29a52eb
--- /dev/null
+++ b/scripts/rdma-migration-helper.sh
@@ -0,0 +1,48 @@ 
+#!/bin/bash
+
+# Copied from blktests
+get_ipv4_addr()
+{
+    ip -4 -o addr show dev "$1" |
+        sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p' |
+        tr -d '\n'
+}
+
+has_soft_rdma()
+{
+    rdma link | grep -q " netdev $1[[:blank:]]*\$"
+}
+
+rdma_rxe_setup_detect()
+{
+    (
+        cd /sys/class/net &&
+            for i in *; do
+                [ -e "$i" ] || continue
+                [ "$i" = "lo" ] && continue
+                [ "$(<"$i/addr_len")" = 6 ] || continue
+                [ "$(<"$i/carrier")" = 1 ] || continue
+
+                has_soft_rdma "$i" && break
+                [ "$operation" = "setup" ] &&
+                    rdma link add "${i}_rxe" type rxe netdev "$i" && break
+            done
+        has_soft_rdma "$i" || return
+        get_ipv4_addr "$i"
+    )
+}
+
+operation=${1:-setup}
+
+command -v rdma >/dev/null || {
+    echo "Command 'rdma' is not available, please install it first." >&2
+    exit 1
+}
+
+if [ "$operation" == "setup" ] || [ "$operation" == "detect" ]; then
+    rdma_rxe_setup_detect
+elif [ "$operation" == "clean" ]; then
+    modprobe -r rdma_rxe
+else
+    echo "Usage: $0 [setup | detect | clean]"
+fi
diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
index ba273d10b9..f1fe34020d 100644
--- a/tests/qtest/migration/precopy-tests.c
+++ b/tests/qtest/migration/precopy-tests.c
@@ -99,6 +99,71 @@  static void test_precopy_unix_dirty_ring(void)
     test_precopy_common(&args);
 }
 
+#ifdef CONFIG_RDMA
+
+#define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh"
+static int new_rdma_link(char *buffer, bool verbose)
+{
+    const char *argument = (geteuid() == 0) ? "setup" : "detect";
+    char cmd[1024];
+
+    snprintf(cmd, sizeof(cmd), "%s %s %s", RDMA_MIGRATION_HELPER, argument,
+             verbose ? "" : "2>/dev/null");
+
+    FILE *pipe = popen(cmd, "r");
+    if (pipe == NULL) {
+        perror("Failed to run script");
+        return -1;
+    }
+
+    int idx = 0;
+    while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
+        idx += strlen(buffer);
+    }
+
+    int status = pclose(pipe);
+    if (status == -1) {
+        perror("Error reported by pclose()");
+        return -1;
+    } else if (WIFEXITED(status)) {
+        return WEXITSTATUS(status);
+    }
+
+    return -1;
+}
+
+static void test_precopy_rdma_plain(void)
+{
+    char buffer[128] = {};
+    bool verbose = g_getenv("QTEST_LOG");
+
+    if (new_rdma_link(buffer, verbose)) {
+        g_test_skip("No rdma link available");
+        if (verbose) {
+            g_test_message(
+                "To enable the test:\n"
+                "(1) Run \'" RDMA_MIGRATION_HELPER
+                " setup\' with root and rerun the test\n"
+                "or\n(2) Run the test with root privilege");
+        }
+        return;
+    }
+
+    /*
+     * TODO: query a free port instead of hard code.
+     * 29200=('R'+'D'+'M'+'A')*100
+     **/
+    g_autofree char *uri = g_strdup_printf("rdma:%s:29200", buffer);
+
+    MigrateCommon args = {
+        .listen_uri = uri,
+        .connect_uri = uri,
+    };
+
+    test_precopy_common(&args);
+}
+#endif
+
 static void test_precopy_tcp_plain(void)
 {
     MigrateCommon args = {
@@ -1124,6 +1189,10 @@  static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
                        test_multifd_tcp_uri_none);
     migration_test_add("/migration/multifd/tcp/plain/cancel",
                        test_multifd_tcp_cancel);
+#ifdef CONFIG_RDMA
+    migration_test_add("/migration/precopy/rdma/plain",
+                       test_precopy_rdma_plain);
+#endif
 }
 
 void migration_test_add_precopy(MigrationTestEnv *env)