Message ID | 20240412233705.1066444-2-kuba@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | selftests: drv-net: support testing with a remote system | expand |
Jakub Kicinski wrote: > Define the endpoint "model". To execute most meaningful device > driver tests we need to be able to communicate with a remote system, > and have it send traffic to the device under test. > > Various test environments will have different requirements. > > 0) "Local" netdevsim-based testing can simply use net namespaces. > netdevsim supports connecting two devices now, to form a veth-like > construct. > > 1) Similarly on hosts with multiple NICs, the NICs may be connected > together with a loopback cable or internal device loopback. > One interface may be placed into separate netns, and tests > would proceed much like in the netdevsim case. Note that > the loopback config or the moving of one interface > into a netns is not expected to be part of selftest code. > > 2) Some systems may need to communicate with the endpoint via SSH. > > 3) Last but not least environment may have its own custom communication > method. > > Fundamentally we only need two operations: > - run a command remotely > - deploy a binary (if some tool we need is built as part of kselftests) > > Wrap these two in a class. Use dynamic loading to load the Endpoint > class. This will allow very easy definition of other communication > methods without bothering upstream code base. > > Stick to the "simple" / "no unnecessary abstractions" model for > referring to the endpoints. The host / endpoint object are passed > as an argument to the usual cmd() or ip() invocation. For example: > > ip("link show", json=True, host=endpoint) > > Signed-off-by: Jakub Kicinski <kuba@kernel.org> > --- > .../selftests/drivers/net/lib/py/__init__.py | 1 + > .../selftests/drivers/net/lib/py/endpoint.py | 13 +++++++ > .../selftests/drivers/net/lib/py/ep_netns.py | 15 ++++++++ > .../selftests/drivers/net/lib/py/ep_ssh.py | 34 +++++++++++++++++++ > tools/testing/selftests/net/lib/py/utils.py | 19 ++++++----- > 5 files changed, 73 insertions(+), 9 deletions(-) > create mode 100644 tools/testing/selftests/drivers/net/lib/py/endpoint.py > create mode 100644 tools/testing/selftests/drivers/net/lib/py/ep_netns.py > create mode 100644 tools/testing/selftests/drivers/net/lib/py/ep_ssh.py > > diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py > index 4653dffcd962..0d71ec83135b 100644 > --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py > +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py > @@ -15,3 +15,4 @@ KSFT_DIR = (Path(__file__).parent / "../../../..").resolve() > sys.exit(4) > > from .env import * > +from .endpoint import Endpoint > diff --git a/tools/testing/selftests/drivers/net/lib/py/endpoint.py b/tools/testing/selftests/drivers/net/lib/py/endpoint.py > new file mode 100644 > index 000000000000..9272fdc47a06 > --- /dev/null > +++ b/tools/testing/selftests/drivers/net/lib/py/endpoint.py > @@ -0,0 +1,13 @@ > +# SPDX-License-Identifier: GPL-2.0 > + > +import importlib > + > +_modules = {} > + > +def Endpoint(ep_type, ep_args): > + global _modules > + > + if ep_type not in _modules: > + _modules[ep_type] = importlib.import_module("..ep_" + ep_type, __name__) > + > + return getattr(_modules[ep_type], "Endpoint")(ep_args) > diff --git a/tools/testing/selftests/drivers/net/lib/py/ep_netns.py b/tools/testing/selftests/drivers/net/lib/py/ep_netns.py > new file mode 100644 > index 000000000000..f5c588bb31f0 > --- /dev/null > +++ b/tools/testing/selftests/drivers/net/lib/py/ep_netns.py > @@ -0,0 +1,15 @@ > +# SPDX-License-Identifier: GPL-2.0 > + > +from lib.py import cmd > + > + > +class Endpoint: > + def __init__(self, name): > + self.name = name > + > + def cmd(self, *args): > + c = cmd(*args, ns=self.name) > + return c.stdout, c.stderr, c.ret > + > + def deploy(self, what): > + return what > diff --git a/tools/testing/selftests/drivers/net/lib/py/ep_ssh.py b/tools/testing/selftests/drivers/net/lib/py/ep_ssh.py > new file mode 100644 > index 000000000000..620df0dd8c07 > --- /dev/null > +++ b/tools/testing/selftests/drivers/net/lib/py/ep_ssh.py > @@ -0,0 +1,34 @@ > +# SPDX-License-Identifier: GPL-2.0 > + > +import os > +import shlex > +import string > +import random > + > +from lib.py import cmd > + > + > +class Endpoint: > + def __init__(self, name): > + self.name = name > + self._tmpdir = None > + > + def __del__(self): > + if self._tmpdir: > + self.cmd("rm -rf " + self._tmpdir) > + self._tmpdir = None > + > + def cmd(self, comm, *args): > + c = cmd("ssh " + self.name + " " + shlex.quote(comm), *args) > + return c.stdout, c.stderr, c.ret > + > + def _mktmp(self): > + return ''.join(random.choice(string.ascii_lowercase) for _ in range(8)) > + > + def deploy(self, what): > + if not self._tmpdir: > + self._tmpdir = "/tmp/" + self._mktmp() > + self.cmd("mkdir " + self._tmpdir) > + file_name = self._tmpdir + "/" + self._mktmp() + os.path.basename(what) > + cmd(f"scp {what} {self.name}:{file_name}") > + return file_name > diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py > index f0d425731fd4..eff50ddd9a9d 100644 > --- a/tools/testing/selftests/net/lib/py/utils.py > +++ b/tools/testing/selftests/net/lib/py/utils.py > @@ -4,10 +4,8 @@ import json as _json > import subprocess > > class cmd: > - def __init__(self, comm, shell=True, fail=True, ns=None, background=False): > + def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None): > if ns: > - if isinstance(ns, NetNS): > - ns = ns.name > comm = f'ip netns exec {ns} ' + comm > > self.stdout = None > @@ -15,10 +13,13 @@ import subprocess > self.ret = None > > self.comm = comm > - self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE, > - stderr=subprocess.PIPE) > - if not background: > - self.process(terminate=False, fail=fail) > + if host: > + self.stdout, self.stderr, self.ret = host.cmd(comm) > + else: > + self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE, > + stderr=subprocess.PIPE) > + if not background: > + self.process(terminate=False, fail=fail) > > def process(self, terminate=True, fail=None): > if terminate: Perhaps superfluous / putting the cart before the horse, but a few thorny issues I've repeatedly run into with similar infra is 1. Cleaning up remote state in all conditions, including timeout/kill. Some tests require a setup phase before the test, and a matching cleanup phase. If any of the configured state is variable (even just a randomized filepath) this needs to be communicated to the cleanup phase. The remote filepath is handled well here. But if a test needs per-test setup? Say, change MTU or an Ethtool feature. Multiple related tests may want to share a setup/cleanup. Related: some tests may need benefit from a lightweight stateless check phase to detect preconditions before committing to any setup. Again, say an Ethtool feature like rx-gro-hw, or AF_XDP metadata rx. 2. Synchronizing peers. Often both peers need to be started at the same time, but then the client may need to wait until the server is listening. Paolo added a nice local script to detect a listening socket with sockstat. Less of a problem with TCP tests than UDP or raw packet tests.
On Fri, 2024-04-12 at 16:37 -0700, Jakub Kicinski wrote: > +class Endpoint: > + def __init__(self, name): > + self.name = name > + self._tmpdir = None > + > + def __del__(self): > + if self._tmpdir: > + self.cmd("rm -rf " + self._tmpdir) > + self._tmpdir = None > + > + def cmd(self, comm, *args): > + c = cmd("ssh " + self.name + " " + shlex.quote(comm), *args) > + return c.stdout, c.stderr, c.ret If I read correctly the above will do a full ssh handshake for each command. If the test script/setup is complex, I think/fear the overhead could become a bit cumbersome. Would using something alike Fabric to create a single connection at endpoint instantiation time and re-using it for all the command be too much? Thanks, Paolo
On Sun, 14 Apr 2024 13:04:46 -0400 Willem de Bruijn wrote: > 1. Cleaning up remote state in all conditions, including timeout/kill. > > Some tests require a setup phase before the test, and a matching > cleanup phase. If any of the configured state is variable (even > just a randomized filepath) this needs to be communicated to the > cleanup phase. The remote filepath is handled well here. But if > a test needs per-test setup? Say, change MTU or an Ethtool feature. > Multiple related tests may want to share a setup/cleanup. > > Related: some tests may need benefit from a lightweight stateless > check phase to detect preconditions before committing to any setup. > Again, say an Ethtool feature like rx-gro-hw, or AF_XDP metadata rx. I think this falls into the "frameworking debate" we were having with Petr. The consensus seems to be to keep things as simple as possible. If we see that tests are poorly written and would benefit from extra structure we should try impose some, but every local custom is something people will have to learn. timeout/kill is provided to us already by the kselftest harness. > 2. Synchronizing peers. Often both peers need to be started at the > same time, but then the client may need to wait until the server > is listening. Paolo added a nice local script to detect a listening > socket with sockstat. Less of a problem with TCP tests than UDP or > raw packet tests. Yes, definitely. We should probably add that with the first test that needs it.
On Mon, 15 Apr 2024 10:57:31 +0200 Paolo Abeni wrote: > If I read correctly the above will do a full ssh handshake for each > command. If the test script/setup is complex, I think/fear the overhead > could become a bit cumbersome. Connection reuse. I wasn't sure if I should add a hint to the README, let me do so. > Would using something alike Fabric to create a single connection at > endpoint instantiation time and re-using it for all the command be too > much? IDK what "Fabric" is, if its commonly used we can add the option in tree. If less commonly - I hope the dynamic loading scheme will allow users to very easily drop in their own class that integrates with Fabric, without dirtying the tree? :)
Jakub Kicinski wrote: > On Sun, 14 Apr 2024 13:04:46 -0400 Willem de Bruijn wrote: > > 1. Cleaning up remote state in all conditions, including timeout/kill. > > > > Some tests require a setup phase before the test, and a matching > > cleanup phase. If any of the configured state is variable (even > > just a randomized filepath) this needs to be communicated to the > > cleanup phase. The remote filepath is handled well here. But if > > a test needs per-test setup? Say, change MTU or an Ethtool feature. > > Multiple related tests may want to share a setup/cleanup. > > > > Related: some tests may need benefit from a lightweight stateless > > check phase to detect preconditions before committing to any setup. > > Again, say an Ethtool feature like rx-gro-hw, or AF_XDP metadata rx. > > I think this falls into the "frameworking debate" we were having with > Petr. The consensus seems to be to keep things as simple as possible. Makes sense. We can find the sticking points as we go along. tools/testing/selftests/net already has a couple of hardware feature tests, that probably see little use now that they require manual testing (csum, gro, toeplitz, ..). Really excited to include them in this infra to hopefully see more regular testing across more hardware. > If we see that tests are poorly written and would benefit from extra > structure we should try impose some, but every local custom is > something people will have to learn. The above were just observations from embedding tests like those mentioned in our internal custom test framework. Especially with heterogenous hardware, a lot of it is "can we run this test on this platform", or "disable this feature as it interacts with the tested feature" (e.g., HW-GRO and csum.c). > timeout/kill is provided to us already by the kselftest harness. > > > 2. Synchronizing peers. Often both peers need to be started at the > > same time, but then the client may need to wait until the server > > is listening. Paolo added a nice local script to detect a listening > > socket with sockstat. Less of a problem with TCP tests than UDP or > > raw packet tests. > > Yes, definitely. We should probably add that with the first test that > needs it.
On Mon, 2024-04-15 at 07:19 -0700, Jakub Kicinski wrote: > On Mon, 15 Apr 2024 10:57:31 +0200 Paolo Abeni wrote: > > If I read correctly the above will do a full ssh handshake for each > > command. If the test script/setup is complex, I think/fear the overhead > > could become a bit cumbersome. > > Connection reuse. I wasn't sure if I should add a hint to the README, > let me do so. > > > Would using something alike Fabric to create a single connection at > > endpoint instantiation time and re-using it for all the command be too > > much? > > IDK what "Fabric" is, if its commonly used we can add the option > in tree. If less commonly - I hope the dynamic loading scheme > will allow users to very easily drop in their own class that > integrates with Fabric, without dirtying the tree? :) I'm really a python-expert. 'Fabric' a python library to execute commands over ssh: https://www.fabfile.org/ > No idea how much commont it is. I'm fine with ssh connection sharing. Thanks, Paolo
On Mon, 2024-04-15 at 07:19 -0700, Jakub Kicinski wrote: > On Mon, 15 Apr 2024 10:57:31 +0200 Paolo Abeni wrote: > > If I read correctly the above will do a full ssh handshake for each > > command. If the test script/setup is complex, I think/fear the overhead > > could become a bit cumbersome. > > Connection reuse. I wasn't sure if I should add a hint to the README, > let me do so. I'm sorry for the multiple, incremental feedback. I think such hinto the readme will be definitely useful, thanks! Paolo
Willem de Bruijn <willemdebruijn.kernel@gmail.com> writes: > 1. Cleaning up remote state in all conditions, including timeout/kill. > > Some tests require a setup phase before the test, and a matching > cleanup phase. If any of the configured state is variable (even > just a randomized filepath) this needs to be communicated to the > cleanup phase. The remote filepath is handled well here. But if > a test needs per-test setup? Say, change MTU or an Ethtool feature. > Multiple related tests may want to share a setup/cleanup. Personally I like to wrap responsibilities of this sort in context managers, e.g. something along these lines: class changed_mtu: def __init__(self, dev, mtu): self.dev = dev self.mtu = mtu def __enter__(self): js = cmd(f"ip -j link show dev {self.dev}", json=True) self.orig_mtu = something_something(js) cmd(f"ip link set dev {self.dev} mtu {self.mtu}") def __exit__(self, type, value, traceback): cmd(f"ip link set dev {self.dev} mtu {self.orig_mtu}") with changed_mtu(swp1, 10000): # MTU is 10K here # and back to 1500 A lot of this can be made generic, where some object is given a setup / cleanup commands and just invokes those. But things like MTU, ethtool speed, sysctls and what have you that need to save a previous state and revert back to it will probably need a custom handler. Like we have them in lib.sh as well.
diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 4653dffcd962..0d71ec83135b 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -15,3 +15,4 @@ KSFT_DIR = (Path(__file__).parent / "../../../..").resolve() sys.exit(4) from .env import * +from .endpoint import Endpoint diff --git a/tools/testing/selftests/drivers/net/lib/py/endpoint.py b/tools/testing/selftests/drivers/net/lib/py/endpoint.py new file mode 100644 index 000000000000..9272fdc47a06 --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/endpoint.py @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 + +import importlib + +_modules = {} + +def Endpoint(ep_type, ep_args): + global _modules + + if ep_type not in _modules: + _modules[ep_type] = importlib.import_module("..ep_" + ep_type, __name__) + + return getattr(_modules[ep_type], "Endpoint")(ep_args) diff --git a/tools/testing/selftests/drivers/net/lib/py/ep_netns.py b/tools/testing/selftests/drivers/net/lib/py/ep_netns.py new file mode 100644 index 000000000000..f5c588bb31f0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/ep_netns.py @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import cmd + + +class Endpoint: + def __init__(self, name): + self.name = name + + def cmd(self, *args): + c = cmd(*args, ns=self.name) + return c.stdout, c.stderr, c.ret + + def deploy(self, what): + return what diff --git a/tools/testing/selftests/drivers/net/lib/py/ep_ssh.py b/tools/testing/selftests/drivers/net/lib/py/ep_ssh.py new file mode 100644 index 000000000000..620df0dd8c07 --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/ep_ssh.py @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-2.0 + +import os +import shlex +import string +import random + +from lib.py import cmd + + +class Endpoint: + def __init__(self, name): + self.name = name + self._tmpdir = None + + def __del__(self): + if self._tmpdir: + self.cmd("rm -rf " + self._tmpdir) + self._tmpdir = None + + def cmd(self, comm, *args): + c = cmd("ssh " + self.name + " " + shlex.quote(comm), *args) + return c.stdout, c.stderr, c.ret + + def _mktmp(self): + return ''.join(random.choice(string.ascii_lowercase) for _ in range(8)) + + def deploy(self, what): + if not self._tmpdir: + self._tmpdir = "/tmp/" + self._mktmp() + self.cmd("mkdir " + self._tmpdir) + file_name = self._tmpdir + "/" + self._mktmp() + os.path.basename(what) + cmd(f"scp {what} {self.name}:{file_name}") + return file_name diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index f0d425731fd4..eff50ddd9a9d 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -4,10 +4,8 @@ import json as _json import subprocess class cmd: - def __init__(self, comm, shell=True, fail=True, ns=None, background=False): + def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None): if ns: - if isinstance(ns, NetNS): - ns = ns.name comm = f'ip netns exec {ns} ' + comm self.stdout = None @@ -15,10 +13,13 @@ import subprocess self.ret = None self.comm = comm - self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - if not background: - self.process(terminate=False, fail=fail) + if host: + self.stdout, self.stderr, self.ret = host.cmd(comm) + else: + self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if not background: + self.process(terminate=False, fail=fail) def process(self, terminate=True, fail=None): if terminate: @@ -36,12 +37,12 @@ import subprocess raise Exception("Command failed: %s\n%s" % (self.proc.args, stderr)) -def ip(args, json=None, ns=None): +def ip(args, json=None, ns=None, host=None): cmd_str = "ip " if json: cmd_str += '-j ' cmd_str += args - cmd_obj = cmd(cmd_str, ns=ns) + cmd_obj = cmd(cmd_str, ns=ns, host=host) if json: return _json.loads(cmd_obj.stdout) return cmd_obj
Define the endpoint "model". To execute most meaningful device driver tests we need to be able to communicate with a remote system, and have it send traffic to the device under test. Various test environments will have different requirements. 0) "Local" netdevsim-based testing can simply use net namespaces. netdevsim supports connecting two devices now, to form a veth-like construct. 1) Similarly on hosts with multiple NICs, the NICs may be connected together with a loopback cable or internal device loopback. One interface may be placed into separate netns, and tests would proceed much like in the netdevsim case. Note that the loopback config or the moving of one interface into a netns is not expected to be part of selftest code. 2) Some systems may need to communicate with the endpoint via SSH. 3) Last but not least environment may have its own custom communication method. Fundamentally we only need two operations: - run a command remotely - deploy a binary (if some tool we need is built as part of kselftests) Wrap these two in a class. Use dynamic loading to load the Endpoint class. This will allow very easy definition of other communication methods without bothering upstream code base. Stick to the "simple" / "no unnecessary abstractions" model for referring to the endpoints. The host / endpoint object are passed as an argument to the usual cmd() or ip() invocation. For example: ip("link show", json=True, host=endpoint) Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- .../selftests/drivers/net/lib/py/__init__.py | 1 + .../selftests/drivers/net/lib/py/endpoint.py | 13 +++++++ .../selftests/drivers/net/lib/py/ep_netns.py | 15 ++++++++ .../selftests/drivers/net/lib/py/ep_ssh.py | 34 +++++++++++++++++++ tools/testing/selftests/net/lib/py/utils.py | 19 ++++++----- 5 files changed, 73 insertions(+), 9 deletions(-) create mode 100644 tools/testing/selftests/drivers/net/lib/py/endpoint.py create mode 100644 tools/testing/selftests/drivers/net/lib/py/ep_netns.py create mode 100644 tools/testing/selftests/drivers/net/lib/py/ep_ssh.py