diff mbox series

[net-next,7/7] testing: net-drv: add a driver test for stats reporting

Message ID 20240402010520.1209517-8-kuba@kernel.org (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series selftests: net: groundwork for YNL-based tests | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl fail Generated files up to date; build failed; build has 6 warnings/errors; GEN HAS DIFF 2 files changed, 15 insertions(+);
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 949 this patch: 949
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 3 of 3 maintainers
netdev/build_clang success Errors and warnings before: 955 this patch: 955
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 961 this patch: 961
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest fail net-next-2024-04-02--12-00 (tests: 947)

Commit Message

Jakub Kicinski April 2, 2024, 1:05 a.m. UTC
Add a very simple test to make sure drivers report expected
stats. Drivers which implement FEC or pause configuration
should report relevant stats. Qstats must be reported,
at least packet and byte counts, and they must match
total device stats.

Tested with netdevsim, bnxt, in-tree and installed.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/drivers/net/stats.py | 85 ++++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/stats.py

Comments

Petr Machata April 2, 2024, 4:37 p.m. UTC | #1
Jakub Kicinski <kuba@kernel.org> writes:

> Add a very simple test to make sure drivers report expected
> stats. Drivers which implement FEC or pause configuration
> should report relevant stats. Qstats must be reported,
> at least packet and byte counts, and they must match
> total device stats.
>
> Tested with netdevsim, bnxt, in-tree and installed.
>
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
>  tools/testing/selftests/drivers/net/stats.py | 85 ++++++++++++++++++++
>  1 file changed, 85 insertions(+)
>  create mode 100755 tools/testing/selftests/drivers/net/stats.py
>
> diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
> new file mode 100755
> index 000000000000..751cca2869b8
> --- /dev/null
> +++ b/tools/testing/selftests/drivers/net/stats.py
> @@ -0,0 +1,85 @@
> +#!/usr/bin/env python3
> +# SPDX-License-Identifier: GPL-2.0
> +
> +from lib.py import ksft_run, ksft_in, ksft_true, KsftSkipEx, KsftXfailEx
> +from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
> +from lib.py import NetDrvEnv
> +
> +cfg = None
> +ethnl = EthtoolFamily()
> +netfam = NetdevFamily()
> +rtnl = RtnlFamily()
> +
> +
> +def check_pause() -> None:
> +    global cfg, ethnl
> +
> +    try:
> +        ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
> +    except NlError as e:
> +        if e.error == 95:
> +            raise KsftXfailEx("pause not supported by the device")
> +        raise
> +
> +    data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
> +                                       "flags": {'stats'}}})
> +    ksft_true(data['stats'], "driver does not report stats")
> +
> +
> +def check_fec() -> None:
> +    global ethnl
> +
> +    try:
> +        ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
> +    except NlError as e:
> +        if e.error == 95:
> +            raise KsftXfailEx("FEC not supported by the device")
> +        raise
> +
> +    data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
> +                                     "flags": {'stats'}}})
> +    ksft_true(data['stats'], "driver does not report stats")
> +
> +
> +def pkt_byte_sum() -> None:
> +    global cfg, netfam, rtnl
> +
> +    def get_qstat(test):
> +        global netfam
> +        stats = netfam.qstats_get({}, dump=True)
> +        if stats:
> +            for qs in stats:
> +                if qs["ifindex"]== test.ifindex:
> +                    return qs
> +
> +    qstat = get_qstat(cfg)
> +    if qstat is None:
> +        raise KsftSkipEx("qstats not supported by the device")
> +
> +    for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
> +        ksft_in(key, qstat, "Drivers should always report basic keys")
> +
> +    # Compare stats, rtnl stats and qstats must match,
> +    # but the interface may be up, so do a series of dumps
> +    # each time the more "recent" stats must be higher or same.
> +    def stat_cmp(rstat, qstat):
> +        for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
> +            if rstat[key] != qstat[key]:
> +                return rstat[key] - qstat[key]
> +        return 0
> +
> +    for _ in range(10):
> +        rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats']
> +        if stat_cmp(rtstat, qstat) < 0:
> +            raise Exception("RTNL stats are lower, fetched later")
> +        qstat = get_qstat(cfg)
> +        if stat_cmp(rtstat, qstat) > 0:
> +            raise Exception("Qstats are lower, fetched later")
> +
> +
> +if __name__ == "__main__":
> +    cfg = NetDrvEnv(__file__)
> +    try:
> +        ksft_run([check_pause, check_fec, pkt_byte_sum])
> +    finally:
> +        del cfg

Yeah, this would be usually done through context managers, as I mention
in the other e-mail. But then cfg would be lexically scoped, which IMHO
is a good thing, but then it needs to be passed around as an argument,
and that makes the ksft_run() invocation a bit messy:

    with NetDrvEnv(__file__) as cfg:
        ksft_run([lambda: check_pause(cfg),
                  lambda: check_fec(cfg),
                  lambda: pkt_byte_sum(cfg)])

Dunno, maybe it could forward *args **kwargs to the cases? But then it
loses some of the readability again.
Jakub Kicinski April 2, 2024, 5:31 p.m. UTC | #2
On Tue, 2 Apr 2024 18:37:44 +0200 Petr Machata wrote:
> Yeah, this would be usually done through context managers, as I mention
> in the other e-mail. But then cfg would be lexically scoped, which IMHO
> is a good thing, but then it needs to be passed around as an argument,
> and that makes the ksft_run() invocation a bit messy:
> 
>     with NetDrvEnv(__file__) as cfg:
>         ksft_run([lambda: check_pause(cfg),
>                   lambda: check_fec(cfg),
>                   lambda: pkt_byte_sum(cfg)])
> 
> Dunno, maybe it could forward *args **kwargs to the cases? But then it
> loses some of the readability again.

Yes, I was wondering about that. It must be doable, IIRC 
the multi-threading API "injects" args from a tuple.
I was thinking something along the lines of:

    with NetDrvEnv(__file__) as cfg:
        ksft_run([check_pause, check_fec, pkt_byte_sum],
                 args=(cfg, ))

I got lazy, let me take a closer look. Another benefit
will be that once we pass in "env" / cfg - we can "register" 
objects in there for auto-cleanup (in the future, current
tests don't need cleanup)
Jakub Kicinski April 2, 2024, 5:44 p.m. UTC | #3
On Tue, 2 Apr 2024 10:31:11 -0700 Jakub Kicinski wrote:
> Yes, I was wondering about that. It must be doable, IIRC 
> the multi-threading API "injects" args from a tuple.
> I was thinking something along the lines of:
> 
>     with NetDrvEnv(__file__) as cfg:
>         ksft_run([check_pause, check_fec, pkt_byte_sum],
>                  args=(cfg, ))

seems to work, is this good?

diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 7c296fe5e438..c7210525981c 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -60,7 +60,7 @@ KSFT_RESULT = None
     print(res)
 
 
-def ksft_run(cases):
+def ksft_run(cases, args=()):
     totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
 
     print("KTAP version 1")
@@ -72,7 +72,7 @@ KSFT_RESULT = None
         KSFT_RESULT = True
         cnt += 1
         try:
-            case()
+            case(*args)
         except KsftSkipEx as e:
             ktap_result(True, cnt, case, comment="SKIP " + str(e))
             totals['skip'] += 1
Petr Machata April 2, 2024, 10:02 p.m. UTC | #4
Jakub Kicinski <kuba@kernel.org> writes:

> On Tue, 2 Apr 2024 10:31:11 -0700 Jakub Kicinski wrote:
>> Yes, I was wondering about that. It must be doable, IIRC 
>> the multi-threading API "injects" args from a tuple.
>> I was thinking something along the lines of:
>> 
>>     with NetDrvEnv(__file__) as cfg:
>>         ksft_run([check_pause, check_fec, pkt_byte_sum],
>>                  args=(cfg, ))
>
> seems to work, is this good?
>
> diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
> index 7c296fe5e438..c7210525981c 100644
> --- a/tools/testing/selftests/net/lib/py/ksft.py
> +++ b/tools/testing/selftests/net/lib/py/ksft.py
> @@ -60,7 +60,7 @@ KSFT_RESULT = None
>      print(res)
>  
>  
> -def ksft_run(cases):
> +def ksft_run(cases, args=()):
>      totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
>  
>      print("KTAP version 1")
> @@ -72,7 +72,7 @@ KSFT_RESULT = None
>          KSFT_RESULT = True
>          cnt += 1
>          try:
> -            case()
> +            case(*args)
>          except KsftSkipEx as e:
>              ktap_result(True, cnt, case, comment="SKIP " + str(e))
>              totals['skip'] += 1

Yep, looks good.
Petr Machata April 2, 2024, 10:04 p.m. UTC | #5
Jakub Kicinski <kuba@kernel.org> writes:

> On Tue, 2 Apr 2024 18:37:44 +0200 Petr Machata wrote:
>> Yeah, this would be usually done through context managers, as I mention
>> in the other e-mail. But then cfg would be lexically scoped, which IMHO
>> is a good thing, but then it needs to be passed around as an argument,
>> and that makes the ksft_run() invocation a bit messy:
>> 
>>     with NetDrvEnv(__file__) as cfg:
>>         ksft_run([lambda: check_pause(cfg),
>>                   lambda: check_fec(cfg),
>>                   lambda: pkt_byte_sum(cfg)])
>> 
>> Dunno, maybe it could forward *args **kwargs to the cases? But then it
>> loses some of the readability again.
>
> Yes, I was wondering about that. It must be doable, IIRC 
> the multi-threading API "injects" args from a tuple.
> I was thinking something along the lines of:
>
>     with NetDrvEnv(__file__) as cfg:
>         ksft_run([check_pause, check_fec, pkt_byte_sum],
>                  args=(cfg, ))
>
> I got lazy, let me take a closer look. Another benefit
> will be that once we pass in "env" / cfg - we can "register" 
> objects in there for auto-cleanup (in the future, current
> tests don't need cleanup)

Yeah, though some of those should probably just be their own context
managers IMHO, not necessarily hooked to cfg. I'm thinking something
fairly general, so that the support boilerplate doesn't end up costing
an arm and leg:

    with build("ip route add 192.0.2.1/28 nexthop via 192.0.2.17",
               "ip route del 192.0.2.1/28"),
         build("ip link set dev %s master %s" % (swp1, h1),
               "ip link set dev %s nomaster" % swp1):
        le_test()

Dunno. I guess it makes sense to have some of the common stuff
predefined, e.g. "with vrf() as h1". And then the stuff that's typically
in lib.sh's setup() and cleanup(), can be losslessly hooked up to cfg.

This is what I ended up gravitating towards after writing a handful of
LNST tests anyway. The scoping makes it clear where the object exists,
lifetime is taken care of, it's all ponies rainbows basically. At least
as long as your object lifetimes can be cleanly nested, which admittedly
is not always.
Jakub Kicinski April 2, 2024, 11:36 p.m. UTC | #6
On Wed, 3 Apr 2024 00:04:14 +0200 Petr Machata wrote:
> > Yes, I was wondering about that. It must be doable, IIRC 
> > the multi-threading API "injects" args from a tuple.
> > I was thinking something along the lines of:
> >
> >     with NetDrvEnv(__file__) as cfg:
> >         ksft_run([check_pause, check_fec, pkt_byte_sum],
> >                  args=(cfg, ))
> >
> > I got lazy, let me take a closer look. Another benefit
> > will be that once we pass in "env" / cfg - we can "register" 
> > objects in there for auto-cleanup (in the future, current
> > tests don't need cleanup)  
> 
> Yeah, though some of those should probably just be their own context
> managers IMHO, not necessarily hooked to cfg. I'm thinking something
> fairly general, so that the support boilerplate doesn't end up costing
> an arm and leg:
> 
>     with build("ip route add 192.0.2.1/28 nexthop via 192.0.2.17",
>                "ip route del 192.0.2.1/28"),
>          build("ip link set dev %s master %s" % (swp1, h1),
>                "ip link set dev %s nomaster" % swp1):
>         le_test()
>
> Dunno. I guess it makes sense to have some of the common stuff
> predefined, e.g. "with vrf() as h1". And then the stuff that's typically
> in lib.sh's setup() and cleanup(), can be losslessly hooked up to cfg.

I was thinking of something along the lines of:

def test_abc(cfg):
    cfg.build("ip route add 192.0.2.1/28 nexthop via 192.0.2.17",
              "ip route del 192.0.2.1/28")
    cfg.build("ip link set dev %s master %s" % (swp1, h1),
              "ip link set dev %s nomaster" % swp1)

optionally we could then also:

     thing = cfg.build("ip link set dev %s master %s" % (swp1, h1),
                       "ip link set dev %s nomaster" % swp1)

     # ... some code which may raise ...

     # unlink to do something else with the device
     del thing
     # ... more code ... 

cfg may not be best here, could be cleaner to create a "test" object,
always pass it in as the first param, and destroy it after each test.

> This is what I ended up gravitating towards after writing a handful of
> LNST tests anyway. The scoping makes it clear where the object exists,
> lifetime is taken care of, it's all ponies rainbows basically. At least
> as long as your object lifetimes can be cleanly nested, which admittedly
> is not always.

Should be fairly easy to support all cases - "with", "recording on
cfg/test" and del.  Unfortunately in the two tests I came up with
quickly for this series cleanup is only needed for the env itself.
It's a bit awkward to add the lifetime helpers without any users.
David Wei April 3, 2024, 3:09 a.m. UTC | #7
On 2024-04-01 18:05, Jakub Kicinski wrote:
> Add a very simple test to make sure drivers report expected
> stats. Drivers which implement FEC or pause configuration
> should report relevant stats. Qstats must be reported,
> at least packet and byte counts, and they must match
> total device stats.
> 
> Tested with netdevsim, bnxt, in-tree and installed.
> 
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
>  tools/testing/selftests/drivers/net/stats.py | 85 ++++++++++++++++++++
>  1 file changed, 85 insertions(+)
>  create mode 100755 tools/testing/selftests/drivers/net/stats.py
> 
> diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
> new file mode 100755
> index 000000000000..751cca2869b8
> --- /dev/null
> +++ b/tools/testing/selftests/drivers/net/stats.py
> @@ -0,0 +1,85 @@
> +#!/usr/bin/env python3
> +# SPDX-License-Identifier: GPL-2.0
> +
> +from lib.py import ksft_run, ksft_in, ksft_true, KsftSkipEx, KsftXfailEx
> +from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
> +from lib.py import NetDrvEnv
> +
> +cfg = None
> +ethnl = EthtoolFamily()
> +netfam = NetdevFamily()
> +rtnl = RtnlFamily()
> +
> +
> +def check_pause() -> None:
> +    global cfg, ethnl
> +
> +    try:
> +        ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
> +    except NlError as e:
> +        if e.error == 95:
> +            raise KsftXfailEx("pause not supported by the device")
> +        raise
> +
> +    data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
> +                                       "flags": {'stats'}}})
> +    ksft_true(data['stats'], "driver does not report stats")
> +
> +
> +def check_fec() -> None:
> +    global ethnl
> +
> +    try:
> +        ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
> +    except NlError as e:
> +        if e.error == 95:
> +            raise KsftXfailEx("FEC not supported by the device")
> +        raise
> +
> +    data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
> +                                     "flags": {'stats'}}})
> +    ksft_true(data['stats'], "driver does not report stats")
> +
> +
> +def pkt_byte_sum() -> None:
> +    global cfg, netfam, rtnl
> +
> +    def get_qstat(test):
> +        global netfam
> +        stats = netfam.qstats_get({}, dump=True)
> +        if stats:
> +            for qs in stats:
> +                if qs["ifindex"]== test.ifindex:
> +                    return qs
> +
> +    qstat = get_qstat(cfg)
> +    if qstat is None:
> +        raise KsftSkipEx("qstats not supported by the device")
> +
> +    for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
> +        ksft_in(key, qstat, "Drivers should always report basic keys")
> +
> +    # Compare stats, rtnl stats and qstats must match,
> +    # but the interface may be up, so do a series of dumps
> +    # each time the more "recent" stats must be higher or same.
> +    def stat_cmp(rstat, qstat):
> +        for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
> +            if rstat[key] != qstat[key]:
> +                return rstat[key] - qstat[key]
> +        return 0
> +
> +    for _ in range(10):
> +        rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats']
> +        if stat_cmp(rtstat, qstat) < 0:
> +            raise Exception("RTNL stats are lower, fetched later")
> +        qstat = get_qstat(cfg)
> +        if stat_cmp(rtstat, qstat) > 0:
> +            raise Exception("Qstats are lower, fetched later")
> +
> +
> +if __name__ == "__main__":
> +    cfg = NetDrvEnv(__file__)
> +    try:
> +        ksft_run([check_pause, check_fec, pkt_byte_sum])

Would there ever be a case where you don't want to run every test case
in a suite?

> +    finally:
> +        del cfg
David Wei April 3, 2024, 3:15 a.m. UTC | #8
On 2024-04-02 10:31, Jakub Kicinski wrote:
> On Tue, 2 Apr 2024 18:37:44 +0200 Petr Machata wrote:
>> Yeah, this would be usually done through context managers, as I mention
>> in the other e-mail. But then cfg would be lexically scoped, which IMHO
>> is a good thing, but then it needs to be passed around as an argument,
>> and that makes the ksft_run() invocation a bit messy:
>>
>>     with NetDrvEnv(__file__) as cfg:
>>         ksft_run([lambda: check_pause(cfg),
>>                   lambda: check_fec(cfg),
>>                   lambda: pkt_byte_sum(cfg)])
>>
>> Dunno, maybe it could forward *args **kwargs to the cases? But then it
>> loses some of the readability again.
> 
> Yes, I was wondering about that. It must be doable, IIRC 
> the multi-threading API "injects" args from a tuple.
> I was thinking something along the lines of:
> 
>     with NetDrvEnv(__file__) as cfg:
>         ksft_run([check_pause, check_fec, pkt_byte_sum],
>                  args=(cfg, ))
> 
> I got lazy, let me take a closer look. Another benefit
> will be that once we pass in "env" / cfg - we can "register" 
> objects in there for auto-cleanup (in the future, current
> tests don't need cleanup)

How about a TestSuite class as a context manager and individual tests
being methods? Then running the test suite runs all test cases and you
won't need to add each test case manually to ksft_run().
Petr Machata April 3, 2024, 8:58 a.m. UTC | #9
Jakub Kicinski <kuba@kernel.org> writes:

> On Wed, 3 Apr 2024 00:04:14 +0200 Petr Machata wrote:
>> > Yes, I was wondering about that. It must be doable, IIRC 
>> > the multi-threading API "injects" args from a tuple.
>> > I was thinking something along the lines of:
>> >
>> >     with NetDrvEnv(__file__) as cfg:
>> >         ksft_run([check_pause, check_fec, pkt_byte_sum],
>> >                  args=(cfg, ))
>> >
>> > I got lazy, let me take a closer look. Another benefit
>> > will be that once we pass in "env" / cfg - we can "register" 
>> > objects in there for auto-cleanup (in the future, current
>> > tests don't need cleanup)  
>> 
>> Yeah, though some of those should probably just be their own context
>> managers IMHO, not necessarily hooked to cfg. I'm thinking something
>> fairly general, so that the support boilerplate doesn't end up costing
>> an arm and leg:
>> 
>>     with build("ip route add 192.0.2.1/28 nexthop via 192.0.2.17",
>>                "ip route del 192.0.2.1/28"),
>>          build("ip link set dev %s master %s" % (swp1, h1),
>>                "ip link set dev %s nomaster" % swp1):
>>         le_test()
>>
>> Dunno. I guess it makes sense to have some of the common stuff
>> predefined, e.g. "with vrf() as h1". And then the stuff that's typically
>> in lib.sh's setup() and cleanup(), can be losslessly hooked up to cfg.
>
> I was thinking of something along the lines of:
>
> def test_abc(cfg):
>     cfg.build("ip route add 192.0.2.1/28 nexthop via 192.0.2.17",
>               "ip route del 192.0.2.1/28")
>     cfg.build("ip link set dev %s master %s" % (swp1, h1),
>               "ip link set dev %s nomaster" % swp1)
>
> optionally we could then also:
>
>      thing = cfg.build("ip link set dev %s master %s" % (swp1, h1),
>                        "ip link set dev %s nomaster" % swp1)
>
>      # ... some code which may raise ...
>
>      # unlink to do something else with the device
>      del thing
>      # ... more code ... 
>
> cfg may not be best here, could be cleaner to create a "test" object,
> always pass it in as the first param, and destroy it after each test.

I assume above you mean that cfg inherits the thing, but cfg lifetime
currently looks like it spreads across several test cases. ksft_run()
would need to know about it and call something to issue the postponed
cleanups between cases.

Also, it's not clear what "del thing" should do in that context, because
if cfg also keeps a reference, __del__ won't get called. There could be
a direct method, like thing.exit() or whatever, but then you need
bookkeeping so as not to clean up the second time through cfg. It's the
less straightforward way of going about it IMHO.

I know that I must sound like a broken record at this point, but look:

    with build("ip link set dev %s master %s" % (swp1, h1),
               "ip link set dev %s nomaster" % swp1) as thing:
        ... some code which may rise ...
    ... more code, interface detached, `thing' gone ...

It's just as concise, makes it very clear where the device is part of
the bridge and where not anymore, and does away with the intricacies of
lifetime management.

If lifetimes don't nest, I think it's just going to be ugly either way.
But I don't think this comes up often.

I don't really see stuff that you could just throw at cfg to keep track
of, apart from the suite configuration (i.e. topology set up). But I
suppose if it comes up, we can do something like:

    thing = cfg.retain(build(..., ...))

Or maybe have a dedicated retainer object, or whatever, it doesn't
necessarily need to be cfg itself.

>> This is what I ended up gravitating towards after writing a handful of
>> LNST tests anyway. The scoping makes it clear where the object exists,
>> lifetime is taken care of, it's all ponies rainbows basically. At least
>> as long as your object lifetimes can be cleanly nested, which admittedly
>> is not always.
>
> Should be fairly easy to support all cases - "with", "recording on
> cfg/test" and del.  Unfortunately in the two tests I came up with

Yup.

> quickly for this series cleanup is only needed for the env itself.
> It's a bit awkward to add the lifetime helpers without any users.

Yeah. I'm basically delving in this now to kinda try and steer future
expectations.
Jakub Kicinski April 3, 2024, 1:55 p.m. UTC | #10
On Wed, 3 Apr 2024 10:58:19 +0200 Petr Machata wrote:
> Also, it's not clear what "del thing" should do in that context, because
> if cfg also keeps a reference, __del__ won't get called. There could be
> a direct method, like thing.exit() or whatever, but then you need
> bookkeeping so as not to clean up the second time through cfg. It's the
> less straightforward way of going about it IMHO.

I see, having read up on what del actually does - "del thing" would
indeed not work here.

> I know that I must sound like a broken record at this point, but look:
> 
>     with build("ip link set dev %s master %s" % (swp1, h1),
>                "ip link set dev %s nomaster" % swp1) as thing:
>         ... some code which may rise ...
>     ... more code, interface detached, `thing' gone ...
> 
> It's just as concise, makes it very clear where the device is part of
> the bridge and where not anymore, and does away with the intricacies of
> lifetime management.

My experience [1] is that with "with" we often end up writing tests
like this:

	def test():
		with a() as bunch,
		     of() as things:
			... entire body of the test indented ...

[1] https://github.com/kuba-moo/linux/blob/psp/tools/net/ynl/psp.py

Nothing wrong with that. I guess the question in my mind is whether
we're aiming for making the tests "pythonic" (in which case "with"
definitely wins), or more of a "bash with classes" style trying to
avoid any constructs people may have to google. I'm on the fence on
that one, as the del example proves my python expertise is not high.
OTOH people who prefer bash will continue to write bash tests,
so maybe we don't have to worry about non-experts too much. Dunno.
Petr Machata April 3, 2024, 4:52 p.m. UTC | #11
Jakub Kicinski <kuba@kernel.org> writes:

> On Wed, 3 Apr 2024 10:58:19 +0200 Petr Machata wrote:
>> Also, it's not clear what "del thing" should do in that context, because
>> if cfg also keeps a reference, __del__ won't get called. There could be
>> a direct method, like thing.exit() or whatever, but then you need
>> bookkeeping so as not to clean up the second time through cfg. It's the
>> less straightforward way of going about it IMHO.
>
> I see, having read up on what del actually does - "del thing" would
> indeed not work here.
>
>> I know that I must sound like a broken record at this point, but look:
>> 
>>     with build("ip link set dev %s master %s" % (swp1, h1),
>>                "ip link set dev %s nomaster" % swp1) as thing:
>>         ... some code which may rise ...
>>     ... more code, interface detached, `thing' gone ...
>> 
>> It's just as concise, makes it very clear where the device is part of
>> the bridge and where not anymore, and does away with the intricacies of
>> lifetime management.
>
> My experience [1] is that with "with" we often end up writing tests
> like this:
>
> 	def test():
> 		with a() as bunch,
> 		     of() as things:
> 			... entire body of the test indented ...
>
> [1] https://github.com/kuba-moo/linux/blob/psp/tools/net/ynl/psp.py

Yeah, that does end up happening. I think there are a couple places
where you could have folded several withs in one, but it is going to be
indented, yeah.

But you end up indenting for try: finally: to make the del work reliably
anyway, so it's kinda lose/lose in that regard.

> Nothing wrong with that. I guess the question in my mind is whether
> we're aiming for making the tests "pythonic" (in which case "with"
> definitely wins), or more of a "bash with classes" style trying to
> avoid any constructs people may have to google. I'm on the fence on
> that one, as the del example proves my python expertise is not high.
> OTOH people who prefer bash will continue to write bash tests,
> so maybe we don't have to worry about non-experts too much. Dunno.

What I'm saying is, bash is currently a bit of a mess when it comes to
cleanups. It's hard to get right, annoying to review, and sometimes
individual cases add state that they don't unwind in cleanup() but only
later in the function, so when you C-c half-way through such case, stuff
stays behind.

Python has tools to just magic all this away.
Jakub Kicinski April 3, 2024, 9:48 p.m. UTC | #12
On Wed, 3 Apr 2024 18:52:50 +0200 Petr Machata wrote:
> > Nothing wrong with that. I guess the question in my mind is whether
> > we're aiming for making the tests "pythonic" (in which case "with"
> > definitely wins), or more of a "bash with classes" style trying to
> > avoid any constructs people may have to google. I'm on the fence on
> > that one, as the del example proves my python expertise is not high.
> > OTOH people who prefer bash will continue to write bash tests,
> > so maybe we don't have to worry about non-experts too much. Dunno.  
> 
> What I'm saying is, bash is currently a bit of a mess when it comes to
> cleanups. It's hard to get right, annoying to review, and sometimes
> individual cases add state that they don't unwind in cleanup() but only
> later in the function, so when you C-c half-way through such case, stuff
> stays behind.
> 
> Python has tools to just magic all this away.

Understood, just to be clear what I was saying is that +/- bugs 
in my example it is possible to "attach" the lifetime of things
to a test object or such. Maybe people would be less likely to remember
to do that than use "with"? Dunno. In any case, IIUC we don't have to
decide now, so I went ahead with the v2 last night.
diff mbox series

Patch

diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
new file mode 100755
index 000000000000..751cca2869b8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -0,0 +1,85 @@ 
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_in, ksft_true, KsftSkipEx, KsftXfailEx
+from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
+from lib.py import NetDrvEnv
+
+cfg = None
+ethnl = EthtoolFamily()
+netfam = NetdevFamily()
+rtnl = RtnlFamily()
+
+
+def check_pause() -> None:
+    global cfg, ethnl
+
+    try:
+        ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
+    except NlError as e:
+        if e.error == 95:
+            raise KsftXfailEx("pause not supported by the device")
+        raise
+
+    data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
+                                       "flags": {'stats'}}})
+    ksft_true(data['stats'], "driver does not report stats")
+
+
+def check_fec() -> None:
+    global ethnl
+
+    try:
+        ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
+    except NlError as e:
+        if e.error == 95:
+            raise KsftXfailEx("FEC not supported by the device")
+        raise
+
+    data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
+                                     "flags": {'stats'}}})
+    ksft_true(data['stats'], "driver does not report stats")
+
+
+def pkt_byte_sum() -> None:
+    global cfg, netfam, rtnl
+
+    def get_qstat(test):
+        global netfam
+        stats = netfam.qstats_get({}, dump=True)
+        if stats:
+            for qs in stats:
+                if qs["ifindex"]== test.ifindex:
+                    return qs
+
+    qstat = get_qstat(cfg)
+    if qstat is None:
+        raise KsftSkipEx("qstats not supported by the device")
+
+    for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
+        ksft_in(key, qstat, "Drivers should always report basic keys")
+
+    # Compare stats, rtnl stats and qstats must match,
+    # but the interface may be up, so do a series of dumps
+    # each time the more "recent" stats must be higher or same.
+    def stat_cmp(rstat, qstat):
+        for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
+            if rstat[key] != qstat[key]:
+                return rstat[key] - qstat[key]
+        return 0
+
+    for _ in range(10):
+        rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats']
+        if stat_cmp(rtstat, qstat) < 0:
+            raise Exception("RTNL stats are lower, fetched later")
+        qstat = get_qstat(cfg)
+        if stat_cmp(rtstat, qstat) > 0:
+            raise Exception("Qstats are lower, fetched later")
+
+
+if __name__ == "__main__":
+    cfg = NetDrvEnv(__file__)
+    try:
+        ksft_run([check_pause, check_fec, pkt_byte_sum])
+    finally:
+        del cfg