diff mbox series

[net-next,1/5] selftests: drv-net: define endpoint structures

Message ID 20240412233705.1066444-2-kuba@kernel.org (mailing list archive)
State New
Headers show
Series selftests: drv-net: support testing with a remote system | expand

Commit Message

Jakub Kicinski April 12, 2024, 11:37 p.m. UTC
Define the endpoint "model". To execute most meaningful device
driver tests we need to be able to communicate with a remote system,
and have it send traffic to the device under test.

Various test environments will have different requirements.

0) "Local" netdevsim-based testing can simply use net namespaces.
netdevsim supports connecting two devices now, to form a veth-like
construct.

1) Similarly on hosts with multiple NICs, the NICs may be connected
together with a loopback cable or internal device loopback.
One interface may be placed into separate netns, and tests
would proceed much like in the netdevsim case. Note that
the loopback config or the moving of one interface
into a netns is not expected to be part of selftest code.

2) Some systems may need to communicate with the endpoint via SSH.

3) Last but not least environment may have its own custom communication
method.

Fundamentally we only need two operations:
 - run a command remotely
 - deploy a binary (if some tool we need is built as part of kselftests)

Wrap these two in a class. Use dynamic loading to load the Endpoint
class. This will allow very easy definition of other communication
methods without bothering upstream code base.

Stick to the "simple" / "no unnecessary abstractions" model for
referring to the endpoints. The host / endpoint object are passed
as an argument to the usual cmd() or ip() invocation. For example:

 ip("link show", json=True, host=endpoint)

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/lib/py/__init__.py  |  1 +
 .../selftests/drivers/net/lib/py/endpoint.py  | 13 +++++++
 .../selftests/drivers/net/lib/py/ep_netns.py  | 15 ++++++++
 .../selftests/drivers/net/lib/py/ep_ssh.py    | 34 +++++++++++++++++++
 tools/testing/selftests/net/lib/py/utils.py   | 19 ++++++-----
 5 files changed, 73 insertions(+), 9 deletions(-)
 create mode 100644 tools/testing/selftests/drivers/net/lib/py/endpoint.py
 create mode 100644 tools/testing/selftests/drivers/net/lib/py/ep_netns.py
 create mode 100644 tools/testing/selftests/drivers/net/lib/py/ep_ssh.py

Comments

Willem de Bruijn April 14, 2024, 5:04 p.m. UTC | #1
Jakub Kicinski wrote:
> Define the endpoint "model". To execute most meaningful device
> driver tests we need to be able to communicate with a remote system,
> and have it send traffic to the device under test.
> 
> Various test environments will have different requirements.
> 
> 0) "Local" netdevsim-based testing can simply use net namespaces.
> netdevsim supports connecting two devices now, to form a veth-like
> construct.
> 
> 1) Similarly on hosts with multiple NICs, the NICs may be connected
> together with a loopback cable or internal device loopback.
> One interface may be placed into separate netns, and tests
> would proceed much like in the netdevsim case. Note that
> the loopback config or the moving of one interface
> into a netns is not expected to be part of selftest code.
> 
> 2) Some systems may need to communicate with the endpoint via SSH.
> 
> 3) Last but not least environment may have its own custom communication
> method.
> 
> Fundamentally we only need two operations:
>  - run a command remotely
>  - deploy a binary (if some tool we need is built as part of kselftests)
> 
> Wrap these two in a class. Use dynamic loading to load the Endpoint
> class. This will allow very easy definition of other communication
> methods without bothering upstream code base.
> 
> Stick to the "simple" / "no unnecessary abstractions" model for
> referring to the endpoints. The host / endpoint object are passed
> as an argument to the usual cmd() or ip() invocation. For example:
> 
>  ip("link show", json=True, host=endpoint)
> 
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
>  .../selftests/drivers/net/lib/py/__init__.py  |  1 +
>  .../selftests/drivers/net/lib/py/endpoint.py  | 13 +++++++
>  .../selftests/drivers/net/lib/py/ep_netns.py  | 15 ++++++++
>  .../selftests/drivers/net/lib/py/ep_ssh.py    | 34 +++++++++++++++++++
>  tools/testing/selftests/net/lib/py/utils.py   | 19 ++++++-----
>  5 files changed, 73 insertions(+), 9 deletions(-)
>  create mode 100644 tools/testing/selftests/drivers/net/lib/py/endpoint.py
>  create mode 100644 tools/testing/selftests/drivers/net/lib/py/ep_netns.py
>  create mode 100644 tools/testing/selftests/drivers/net/lib/py/ep_ssh.py
> 
> diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py
> index 4653dffcd962..0d71ec83135b 100644
> --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py
> +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py
> @@ -15,3 +15,4 @@ KSFT_DIR = (Path(__file__).parent / "../../../..").resolve()
>      sys.exit(4)
>  
>  from .env import *
> +from .endpoint import Endpoint
> diff --git a/tools/testing/selftests/drivers/net/lib/py/endpoint.py b/tools/testing/selftests/drivers/net/lib/py/endpoint.py
> new file mode 100644
> index 000000000000..9272fdc47a06
> --- /dev/null
> +++ b/tools/testing/selftests/drivers/net/lib/py/endpoint.py
> @@ -0,0 +1,13 @@
> +# SPDX-License-Identifier: GPL-2.0
> +
> +import importlib
> +
> +_modules = {}
> +
> +def Endpoint(ep_type, ep_args):
> +    global _modules
> +
> +    if ep_type not in _modules:
> +        _modules[ep_type] = importlib.import_module("..ep_" + ep_type, __name__)
> +
> +    return getattr(_modules[ep_type], "Endpoint")(ep_args)
> diff --git a/tools/testing/selftests/drivers/net/lib/py/ep_netns.py b/tools/testing/selftests/drivers/net/lib/py/ep_netns.py
> new file mode 100644
> index 000000000000..f5c588bb31f0
> --- /dev/null
> +++ b/tools/testing/selftests/drivers/net/lib/py/ep_netns.py
> @@ -0,0 +1,15 @@
> +# SPDX-License-Identifier: GPL-2.0
> +
> +from lib.py import cmd
> +
> +
> +class Endpoint:
> +    def __init__(self, name):
> +        self.name = name
> +
> +    def cmd(self, *args):
> +        c = cmd(*args, ns=self.name)
> +        return c.stdout, c.stderr, c.ret
> +
> +    def deploy(self, what):
> +        return what
> diff --git a/tools/testing/selftests/drivers/net/lib/py/ep_ssh.py b/tools/testing/selftests/drivers/net/lib/py/ep_ssh.py
> new file mode 100644
> index 000000000000..620df0dd8c07
> --- /dev/null
> +++ b/tools/testing/selftests/drivers/net/lib/py/ep_ssh.py
> @@ -0,0 +1,34 @@
> +# SPDX-License-Identifier: GPL-2.0
> +
> +import os
> +import shlex
> +import string
> +import random
> +
> +from lib.py import cmd
> +
> +
> +class Endpoint:
> +    def __init__(self, name):
> +        self.name = name
> +        self._tmpdir = None
> +
> +    def __del__(self):
> +        if self._tmpdir:
> +            self.cmd("rm -rf " + self._tmpdir)
> +            self._tmpdir = None
> +
> +    def cmd(self, comm, *args):
> +        c = cmd("ssh " + self.name + " " + shlex.quote(comm), *args)
> +        return c.stdout, c.stderr, c.ret
> +
> +    def _mktmp(self):
> +        return ''.join(random.choice(string.ascii_lowercase) for _ in range(8))
> +
> +    def deploy(self, what):
> +        if not self._tmpdir:
> +            self._tmpdir = "/tmp/" + self._mktmp()
> +            self.cmd("mkdir " + self._tmpdir)
> +        file_name = self._tmpdir + "/" + self._mktmp() + os.path.basename(what)
> +        cmd(f"scp {what} {self.name}:{file_name}")
> +        return file_name
> diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
> index f0d425731fd4..eff50ddd9a9d 100644
> --- a/tools/testing/selftests/net/lib/py/utils.py
> +++ b/tools/testing/selftests/net/lib/py/utils.py
> @@ -4,10 +4,8 @@ import json as _json
>  import subprocess
>  
>  class cmd:
> -    def __init__(self, comm, shell=True, fail=True, ns=None, background=False):
> +    def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None):
>          if ns:
> -            if isinstance(ns, NetNS):
> -                ns = ns.name
>              comm = f'ip netns exec {ns} ' + comm
>  
>          self.stdout = None
> @@ -15,10 +13,13 @@ import subprocess
>          self.ret = None
>  
>          self.comm = comm
> -        self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE,
> -                                     stderr=subprocess.PIPE)
> -        if not background:
> -            self.process(terminate=False, fail=fail)
> +        if host:
> +            self.stdout, self.stderr, self.ret = host.cmd(comm)
> +        else:
> +            self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE,
> +                                         stderr=subprocess.PIPE)
> +            if not background:
> +                self.process(terminate=False, fail=fail)
>  
>      def process(self, terminate=True, fail=None):
>          if terminate:

Perhaps superfluous / putting the cart before the horse, but a few
thorny issues I've repeatedly run into with similar infra is

1. Cleaning up remote state in all conditions, including timeout/kill.

   Some tests require a setup phase before the test, and a matching
   cleanup phase. If any of the configured state is variable (even
   just a randomized filepath) this needs to be communicated to the
   cleanup phase. The remote filepath is handled well here. But if
   a test needs per-test setup? Say, change MTU or an Ethtool feature.
   Multiple related tests may want to share a setup/cleanup.

   Related: some tests may need benefit from a lightweight stateless
   check phase to detect preconditions before committing to any setup.
   Again, say an Ethtool feature like rx-gro-hw, or AF_XDP metadata rx.

2. Synchronizing peers. Often both peers need to be started at the
   same time, but then the client may need to wait until the server
   is listening. Paolo added a nice local script to detect a listening
   socket with sockstat. Less of a problem with TCP tests than UDP or
   raw packet tests.
Paolo Abeni April 15, 2024, 8:57 a.m. UTC | #2
On Fri, 2024-04-12 at 16:37 -0700, Jakub Kicinski wrote:
> +class Endpoint:
> +    def __init__(self, name):
> +        self.name = name
> +        self._tmpdir = None
> +
> +    def __del__(self):
> +        if self._tmpdir:
> +            self.cmd("rm -rf " + self._tmpdir)
> +            self._tmpdir = None
> +
> +    def cmd(self, comm, *args):
> +        c = cmd("ssh " + self.name + " " + shlex.quote(comm), *args)
> +        return c.stdout, c.stderr, c.ret

If I read correctly the above will do a full ssh handshake for each
command. If the test script/setup is complex, I think/fear the overhead
could become a bit cumbersome.

Would using something alike Fabric to create a single connection at
endpoint instantiation time and re-using it for all the command be too
much? 


Thanks,

Paolo
Jakub Kicinski April 15, 2024, 2:16 p.m. UTC | #3
On Sun, 14 Apr 2024 13:04:46 -0400 Willem de Bruijn wrote:
> 1. Cleaning up remote state in all conditions, including timeout/kill.
> 
>    Some tests require a setup phase before the test, and a matching
>    cleanup phase. If any of the configured state is variable (even
>    just a randomized filepath) this needs to be communicated to the
>    cleanup phase. The remote filepath is handled well here. But if
>    a test needs per-test setup? Say, change MTU or an Ethtool feature.
>    Multiple related tests may want to share a setup/cleanup.
> 
>    Related: some tests may need benefit from a lightweight stateless
>    check phase to detect preconditions before committing to any setup.
>    Again, say an Ethtool feature like rx-gro-hw, or AF_XDP metadata rx.

I think this falls into the "frameworking debate" we were having with
Petr. The consensus seems to be to keep things as simple as possible.
If we see that tests are poorly written and would benefit from extra
structure we should try impose some, but every local custom is
something people will have to learn.

timeout/kill is provided to us already by the kselftest harness.

> 2. Synchronizing peers. Often both peers need to be started at the
>    same time, but then the client may need to wait until the server
>    is listening. Paolo added a nice local script to detect a listening
>    socket with sockstat. Less of a problem with TCP tests than UDP or
>    raw packet tests.

Yes, definitely. We should probably add that with the first test that
needs it.
Jakub Kicinski April 15, 2024, 2:19 p.m. UTC | #4
On Mon, 15 Apr 2024 10:57:31 +0200 Paolo Abeni wrote:
> If I read correctly the above will do a full ssh handshake for each
> command. If the test script/setup is complex, I think/fear the overhead
> could become a bit cumbersome.

Connection reuse. I wasn't sure if I should add a hint to the README,
let me do so.

> Would using something alike Fabric to create a single connection at
> endpoint instantiation time and re-using it for all the command be too
> much? 

IDK what "Fabric" is, if its commonly used we can add the option
in tree. If less commonly - I hope the dynamic loading scheme
will allow users to very easily drop in their own class that 
integrates with Fabric, without dirtying the tree? :)
Willem de Bruijn April 15, 2024, 3:23 p.m. UTC | #5
Jakub Kicinski wrote:
> On Sun, 14 Apr 2024 13:04:46 -0400 Willem de Bruijn wrote:
> > 1. Cleaning up remote state in all conditions, including timeout/kill.
> > 
> >    Some tests require a setup phase before the test, and a matching
> >    cleanup phase. If any of the configured state is variable (even
> >    just a randomized filepath) this needs to be communicated to the
> >    cleanup phase. The remote filepath is handled well here. But if
> >    a test needs per-test setup? Say, change MTU or an Ethtool feature.
> >    Multiple related tests may want to share a setup/cleanup.
> > 
> >    Related: some tests may need benefit from a lightweight stateless
> >    check phase to detect preconditions before committing to any setup.
> >    Again, say an Ethtool feature like rx-gro-hw, or AF_XDP metadata rx.
> 
> I think this falls into the "frameworking debate" we were having with
> Petr. The consensus seems to be to keep things as simple as possible.

Makes sense. We can find the sticking points as we go along.

tools/testing/selftests/net already has a couple of hardware feature
tests, that probably see little use now that they require manual
testing (csum, gro, toeplitz, ..). Really excited to include them in
this infra to hopefully see more regular testing across more hardware.

> If we see that tests are poorly written and would benefit from extra
> structure we should try impose some, but every local custom is
> something people will have to learn.

The above were just observations from embedding tests like those
mentioned in our internal custom test framework. Especially with
heterogenous hardware, a lot of it is "can we run this test on this
platform", or "disable this feature as it interacts with the tested
feature" (e.g., HW-GRO and csum.c).

> timeout/kill is provided to us already by the kselftest harness.
> 
> > 2. Synchronizing peers. Often both peers need to be started at the
> >    same time, but then the client may need to wait until the server
> >    is listening. Paolo added a nice local script to detect a listening
> >    socket with sockstat. Less of a problem with TCP tests than UDP or
> >    raw packet tests.
> 
> Yes, definitely. We should probably add that with the first test that
> needs it.
Paolo Abeni April 15, 2024, 4:02 p.m. UTC | #6
On Mon, 2024-04-15 at 07:19 -0700, Jakub Kicinski wrote:
> On Mon, 15 Apr 2024 10:57:31 +0200 Paolo Abeni wrote:
> > If I read correctly the above will do a full ssh handshake for each
> > command. If the test script/setup is complex, I think/fear the overhead
> > could become a bit cumbersome.
> 
> Connection reuse. I wasn't sure if I should add a hint to the README,
> let me do so.
> 
> > Would using something alike Fabric to create a single connection at
> > endpoint instantiation time and re-using it for all the command be too
> > much? 
> 
> IDK what "Fabric" is, if its commonly used we can add the option
> in tree. If less commonly - I hope the dynamic loading scheme
> will allow users to very easily drop in their own class that 
> integrates with Fabric, without dirtying the tree? :)

I'm really a python-expert. 'Fabric' a python library to execute
commands over ssh:

https://www.fabfile.org/
> 
No idea how much commont it is.

I'm fine with ssh connection sharing.

Thanks,

Paolo
Paolo Abeni April 15, 2024, 4:11 p.m. UTC | #7
On Mon, 2024-04-15 at 07:19 -0700, Jakub Kicinski wrote:
> On Mon, 15 Apr 2024 10:57:31 +0200 Paolo Abeni wrote:
> > If I read correctly the above will do a full ssh handshake for each
> > command. If the test script/setup is complex, I think/fear the overhead
> > could become a bit cumbersome.
> 
> Connection reuse. I wasn't sure if I should add a hint to the README,
> let me do so.

I'm sorry for the multiple, incremental feedback. I think such hinto
the readme will be definitely useful, thanks!

Paolo
Petr Machata April 15, 2024, 7:39 p.m. UTC | #8
Willem de Bruijn <willemdebruijn.kernel@gmail.com> writes:

> 1. Cleaning up remote state in all conditions, including timeout/kill.
>
>    Some tests require a setup phase before the test, and a matching
>    cleanup phase. If any of the configured state is variable (even
>    just a randomized filepath) this needs to be communicated to the
>    cleanup phase. The remote filepath is handled well here. But if
>    a test needs per-test setup? Say, change MTU or an Ethtool feature.
>    Multiple related tests may want to share a setup/cleanup.

Personally I like to wrap responsibilities of this sort in context
managers, e.g. something along these lines:

    class changed_mtu:
        def __init__(self, dev, mtu):
            self.dev = dev
            self.mtu = mtu

        def __enter__(self):
            js = cmd(f"ip -j link show dev {self.dev}", json=True)
            self.orig_mtu = something_something(js)
            cmd(f"ip link set dev {self.dev} mtu {self.mtu}")

        def __exit__(self, type, value, traceback):
            cmd(f"ip link set dev {self.dev} mtu {self.orig_mtu}")

    with changed_mtu(swp1, 10000):
       # MTU is 10K here
    # and back to 1500

A lot of this can be made generic, where some object is given a setup /
cleanup commands and just invokes those. But things like MTU, ethtool
speed, sysctls and what have you that need to save a previous state and
revert back to it will probably need a custom handler. Like we have them
in lib.sh as well.
diff mbox series

Patch

diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py
index 4653dffcd962..0d71ec83135b 100644
--- a/tools/testing/selftests/drivers/net/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py
@@ -15,3 +15,4 @@  KSFT_DIR = (Path(__file__).parent / "../../../..").resolve()
     sys.exit(4)
 
 from .env import *
+from .endpoint import Endpoint
diff --git a/tools/testing/selftests/drivers/net/lib/py/endpoint.py b/tools/testing/selftests/drivers/net/lib/py/endpoint.py
new file mode 100644
index 000000000000..9272fdc47a06
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/endpoint.py
@@ -0,0 +1,13 @@ 
+# SPDX-License-Identifier: GPL-2.0
+
+import importlib
+
+_modules = {}
+
+def Endpoint(ep_type, ep_args):
+    global _modules
+
+    if ep_type not in _modules:
+        _modules[ep_type] = importlib.import_module("..ep_" + ep_type, __name__)
+
+    return getattr(_modules[ep_type], "Endpoint")(ep_args)
diff --git a/tools/testing/selftests/drivers/net/lib/py/ep_netns.py b/tools/testing/selftests/drivers/net/lib/py/ep_netns.py
new file mode 100644
index 000000000000..f5c588bb31f0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/ep_netns.py
@@ -0,0 +1,15 @@ 
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import cmd
+
+
+class Endpoint:
+    def __init__(self, name):
+        self.name = name
+
+    def cmd(self, *args):
+        c = cmd(*args, ns=self.name)
+        return c.stdout, c.stderr, c.ret
+
+    def deploy(self, what):
+        return what
diff --git a/tools/testing/selftests/drivers/net/lib/py/ep_ssh.py b/tools/testing/selftests/drivers/net/lib/py/ep_ssh.py
new file mode 100644
index 000000000000..620df0dd8c07
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/ep_ssh.py
@@ -0,0 +1,34 @@ 
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import shlex
+import string
+import random
+
+from lib.py import cmd
+
+
+class Endpoint:
+    def __init__(self, name):
+        self.name = name
+        self._tmpdir = None
+
+    def __del__(self):
+        if self._tmpdir:
+            self.cmd("rm -rf " + self._tmpdir)
+            self._tmpdir = None
+
+    def cmd(self, comm, *args):
+        c = cmd("ssh " + self.name + " " + shlex.quote(comm), *args)
+        return c.stdout, c.stderr, c.ret
+
+    def _mktmp(self):
+        return ''.join(random.choice(string.ascii_lowercase) for _ in range(8))
+
+    def deploy(self, what):
+        if not self._tmpdir:
+            self._tmpdir = "/tmp/" + self._mktmp()
+            self.cmd("mkdir " + self._tmpdir)
+        file_name = self._tmpdir + "/" + self._mktmp() + os.path.basename(what)
+        cmd(f"scp {what} {self.name}:{file_name}")
+        return file_name
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index f0d425731fd4..eff50ddd9a9d 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -4,10 +4,8 @@  import json as _json
 import subprocess
 
 class cmd:
-    def __init__(self, comm, shell=True, fail=True, ns=None, background=False):
+    def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None):
         if ns:
-            if isinstance(ns, NetNS):
-                ns = ns.name
             comm = f'ip netns exec {ns} ' + comm
 
         self.stdout = None
@@ -15,10 +13,13 @@  import subprocess
         self.ret = None
 
         self.comm = comm
-        self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE,
-                                     stderr=subprocess.PIPE)
-        if not background:
-            self.process(terminate=False, fail=fail)
+        if host:
+            self.stdout, self.stderr, self.ret = host.cmd(comm)
+        else:
+            self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE,
+                                         stderr=subprocess.PIPE)
+            if not background:
+                self.process(terminate=False, fail=fail)
 
     def process(self, terminate=True, fail=None):
         if terminate:
@@ -36,12 +37,12 @@  import subprocess
             raise Exception("Command failed: %s\n%s" % (self.proc.args, stderr))
 
 
-def ip(args, json=None, ns=None):
+def ip(args, json=None, ns=None, host=None):
     cmd_str = "ip "
     if json:
         cmd_str += '-j '
     cmd_str += args
-    cmd_obj = cmd(cmd_str, ns=ns)
+    cmd_obj = cmd(cmd_str, ns=ns, host=host)
     if json:
         return _json.loads(cmd_obj.stdout)
     return cmd_obj