diff mbox series

[net-next,02/13] selftests: ncdevmem: Remove validation

Message ID 20240912171251.937743-3-sdf@fomichev.me (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series selftests: ncdevmem: Add ncdevmem to ksft | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 7 this patch: 7
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 2 maintainers not CCed: shuah@kernel.org linux-kselftest@vger.kernel.org
netdev/build_clang success Errors and warnings before: 7 this patch: 7
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 7 this patch: 7
netdev/checkpatch warning CHECK: Lines should not end with a '('
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest warning net-next-2024-09-13--00-00 (tests: 536)

Commit Message

Stanislav Fomichev Sept. 12, 2024, 5:12 p.m. UTC
ncdevmem should (see next patches) print the payload on the stdout.
The validation can and should be done by the callers:

$ ncdevmem -l ... > file
$ sha256sum file

Cc: Mina Almasry <almasrymina@google.com>
Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
---
 tools/testing/selftests/net/ncdevmem.c | 56 +++-----------------------
 1 file changed, 6 insertions(+), 50 deletions(-)

Comments

Mina Almasry Sept. 12, 2024, 8:36 p.m. UTC | #1
On Thu, Sep 12, 2024 at 10:12 AM Stanislav Fomichev <sdf@fomichev.me> wrote:
>
> ncdevmem should (see next patches) print the payload on the stdout.
> The validation can and should be done by the callers:
>
> $ ncdevmem -l ... > file
> $ sha256sum file
>
> Cc: Mina Almasry <almasrymina@google.com>
> Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
> ---
>  tools/testing/selftests/net/ncdevmem.c | 56 +++-----------------------
>  1 file changed, 6 insertions(+), 50 deletions(-)
>
> diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c
> index 352dba211fb0..3712296d997b 100644
> --- a/tools/testing/selftests/net/ncdevmem.c
> +++ b/tools/testing/selftests/net/ncdevmem.c
> @@ -64,24 +64,13 @@
>  static char *server_ip = "192.168.1.4";
>  static char *client_ip = "192.168.1.2";
>  static char *port = "5201";
> -static size_t do_validation;
>  static int start_queue = 8;
>  static int num_queues = 8;
>  static char *ifname = "eth1";
>  static unsigned int ifindex;
>  static unsigned int dmabuf_id;
>
> -void print_bytes(void *ptr, size_t size)
> -{
> -       unsigned char *p = ptr;
> -       int i;
> -
> -       for (i = 0; i < size; i++)
> -               printf("%02hhX ", p[i]);
> -       printf("\n");
> -}
> -
> -void print_nonzero_bytes(void *ptr, size_t size)
> +static void print_nonzero_bytes(void *ptr, size_t size)
>  {
>         unsigned char *p = ptr;
>         unsigned int i;
> @@ -91,30 +80,6 @@ void print_nonzero_bytes(void *ptr, size_t size)
>         printf("\n");
>  }
>
> -void validate_buffer(void *line, size_t size)
> -{
> -       static unsigned char seed = 1;
> -       unsigned char *ptr = line;
> -       int errors = 0;
> -       size_t i;
> -
> -       for (i = 0; i < size; i++) {
> -               if (ptr[i] != seed) {
> -                       fprintf(stderr,
> -                               "Failed validation: expected=%u, actual=%u, index=%lu\n",
> -                               seed, ptr[i], i);
> -                       errors++;

FWIW the index at where the validation started to fail often gives
critical clues about where the bug is, along with this line, which I'm
glad is not removed:

printf("received frag_page=%llu, in_page_offset=%llu,
frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu,
dmabuf_id=%u\n",

I think we can ensure that what is doing the validation above ncdevmem
prints enough context about the error. Although, just to understand
your thinking a bit, why not have this binary do the validation
itself?

--
Thanks,
Mina
Stanislav Fomichev Sept. 12, 2024, 9:57 p.m. UTC | #2
On 09/12, Mina Almasry wrote:
> On Thu, Sep 12, 2024 at 10:12 AM Stanislav Fomichev <sdf@fomichev.me> wrote:
> >
> > ncdevmem should (see next patches) print the payload on the stdout.
> > The validation can and should be done by the callers:
> >
> > $ ncdevmem -l ... > file
> > $ sha256sum file
> >
> > Cc: Mina Almasry <almasrymina@google.com>
> > Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
> > ---
> >  tools/testing/selftests/net/ncdevmem.c | 56 +++-----------------------
> >  1 file changed, 6 insertions(+), 50 deletions(-)
> >
> > diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c
> > index 352dba211fb0..3712296d997b 100644
> > --- a/tools/testing/selftests/net/ncdevmem.c
> > +++ b/tools/testing/selftests/net/ncdevmem.c
> > @@ -64,24 +64,13 @@
> >  static char *server_ip = "192.168.1.4";
> >  static char *client_ip = "192.168.1.2";
> >  static char *port = "5201";
> > -static size_t do_validation;
> >  static int start_queue = 8;
> >  static int num_queues = 8;
> >  static char *ifname = "eth1";
> >  static unsigned int ifindex;
> >  static unsigned int dmabuf_id;
> >
> > -void print_bytes(void *ptr, size_t size)
> > -{
> > -       unsigned char *p = ptr;
> > -       int i;
> > -
> > -       for (i = 0; i < size; i++)
> > -               printf("%02hhX ", p[i]);
> > -       printf("\n");
> > -}
> > -
> > -void print_nonzero_bytes(void *ptr, size_t size)
> > +static void print_nonzero_bytes(void *ptr, size_t size)
> >  {
> >         unsigned char *p = ptr;
> >         unsigned int i;
> > @@ -91,30 +80,6 @@ void print_nonzero_bytes(void *ptr, size_t size)
> >         printf("\n");
> >  }
> >
> > -void validate_buffer(void *line, size_t size)
> > -{
> > -       static unsigned char seed = 1;
> > -       unsigned char *ptr = line;
> > -       int errors = 0;
> > -       size_t i;
> > -
> > -       for (i = 0; i < size; i++) {
> > -               if (ptr[i] != seed) {
> > -                       fprintf(stderr,
> > -                               "Failed validation: expected=%u, actual=%u, index=%lu\n",
> > -                               seed, ptr[i], i);
> > -                       errors++;
> 
> FWIW the index at where the validation started to fail often gives
> critical clues about where the bug is, along with this line, which I'm
> glad is not removed:
> 
> printf("received frag_page=%llu, in_page_offset=%llu,
> frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu,
> dmabuf_id=%u\n",
> 
> I think we can ensure that what is doing the validation above ncdevmem
> prints enough context about the error. Although, just to understand
> your thinking a bit, why not have this binary do the validation
> itself?

Right, the debugging stuff will still be there to track the offending
part. And the caller can print out the idx of data where the validation
failed.

The reason I removed it from the tool is to be able to do the validation
with regular nc on the other side. I just do:

- echo "expected payload" | nc ..
- ncdevmem -s .. > expected_payload.txt
- [ "expected payload" != $(cat expected_payload.txt) ] && fail

If I were to use the validation on the ncdevmem side, I'd need to bother
with generating the payload that it expects which seems like
an unnecessary complication? For the (to be posted) txrx test I basically
do the following (sha256sum to validate a bunch of random bytes):

def check_txrx(cfg) -> None:
    cfg.require_v6()
    require_devmem(cfg)

    cmd(f"cat /dev/urandom | tr -dc '[:print:]' | head -c 1M > random_file.txt", host=cfg.remote, shell=True)
    want_sha = cmd(f"sha256sum random_file.txt", host=cfg.remote, shell=True).stdout.strip()

    port = rand_port()
    listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.v6} -p {port} | tee random_file.txt | sha256sum -"

    pwd = cmd(f"pwd").stdout.strip()
    with bkg(listen_cmd, exit_wait=True) as nc:
        wait_port_listen(port)
        cmd(f"cat random_file.txt | {pwd}/ncdevmem -f {cfg.ifname} -s {cfg.v6} -p {port}", host=cfg.remote, shell=True)

    ksft_eq(nc.stdout.strip().split(" ")[0], want_sha.split(" ")[0])
Mina Almasry Sept. 13, 2024, 3:30 p.m. UTC | #3
On Thu, Sep 12, 2024 at 2:57 PM Stanislav Fomichev <stfomichev@gmail.com> wrote:
>
> On 09/12, Mina Almasry wrote:
> > On Thu, Sep 12, 2024 at 10:12 AM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > >
> > > ncdevmem should (see next patches) print the payload on the stdout.
> > > The validation can and should be done by the callers:
> > >
> > > $ ncdevmem -l ... > file
> > > $ sha256sum file
> > >
> > > Cc: Mina Almasry <almasrymina@google.com>
> > > Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
> > > ---
> > >  tools/testing/selftests/net/ncdevmem.c | 56 +++-----------------------
> > >  1 file changed, 6 insertions(+), 50 deletions(-)
> > >
> > > diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c
> > > index 352dba211fb0..3712296d997b 100644
> > > --- a/tools/testing/selftests/net/ncdevmem.c
> > > +++ b/tools/testing/selftests/net/ncdevmem.c
> > > @@ -64,24 +64,13 @@
> > >  static char *server_ip = "192.168.1.4";
> > >  static char *client_ip = "192.168.1.2";
> > >  static char *port = "5201";
> > > -static size_t do_validation;
> > >  static int start_queue = 8;
> > >  static int num_queues = 8;
> > >  static char *ifname = "eth1";
> > >  static unsigned int ifindex;
> > >  static unsigned int dmabuf_id;
> > >
> > > -void print_bytes(void *ptr, size_t size)
> > > -{
> > > -       unsigned char *p = ptr;
> > > -       int i;
> > > -
> > > -       for (i = 0; i < size; i++)
> > > -               printf("%02hhX ", p[i]);
> > > -       printf("\n");
> > > -}
> > > -
> > > -void print_nonzero_bytes(void *ptr, size_t size)
> > > +static void print_nonzero_bytes(void *ptr, size_t size)
> > >  {
> > >         unsigned char *p = ptr;
> > >         unsigned int i;
> > > @@ -91,30 +80,6 @@ void print_nonzero_bytes(void *ptr, size_t size)
> > >         printf("\n");
> > >  }
> > >
> > > -void validate_buffer(void *line, size_t size)
> > > -{
> > > -       static unsigned char seed = 1;
> > > -       unsigned char *ptr = line;
> > > -       int errors = 0;
> > > -       size_t i;
> > > -
> > > -       for (i = 0; i < size; i++) {
> > > -               if (ptr[i] != seed) {
> > > -                       fprintf(stderr,
> > > -                               "Failed validation: expected=%u, actual=%u, index=%lu\n",
> > > -                               seed, ptr[i], i);
> > > -                       errors++;
> >
> > FWIW the index at where the validation started to fail often gives
> > critical clues about where the bug is, along with this line, which I'm
> > glad is not removed:
> >
> > printf("received frag_page=%llu, in_page_offset=%llu,
> > frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu,
> > dmabuf_id=%u\n",
> >
> > I think we can ensure that what is doing the validation above ncdevmem
> > prints enough context about the error. Although, just to understand
> > your thinking a bit, why not have this binary do the validation
> > itself?
>
> Right, the debugging stuff will still be there to track the offending
> part. And the caller can print out the idx of data where the validation
> failed.
>

Sorry to harp on this, but on second thought I don't think just
printing out the idx is good enough. In many cases all the context
printed by ncdevmem validation (page_frag/offset/dmabuf_id/etc) is
useful, and it's useful to have it inline with where the check failed.

IIUC after your changes the frag_page/offset/dmabuf_id will go to
stderr output of ncdevmem, but the validation check fail will go to a
different log by the parent checker. Matching the failure in the 2
logs in megs of frag output will be annoying.

> The reason I removed it from the tool is to be able to do the validation
> with regular nc on the other side. I just do:
>

To be honest I don't think the ncdevmem validation gets in the way of that.

To test rx, we can set up regular nc on the client side, and have
ncdevmem do validation on rx.
To test tx, we can have ncdevmem do tx (no validation), and have a
script on top of nc do validation on the rx side.

I guess you may find the lack of symmetry annoying, but IMO it's less
annoying than losing some debuggability when the test fails.

I think probably there are 3 hopefully agreeable things we can do here:

1. Keep the validation as is and do the rx/tx test as I described above.
2. keep the ncdevmem validation, dump it to stderr, and ignore it in
the test. Leave it be for folks wanting to run the test manually to
debug it.
3. Move the validation to a parent process of ncdevmem, but that
parent process needs to print full context of the failure in 1 log.

I prefer #1 TBH.
Stanislav Fomichev Sept. 13, 2024, 5:15 p.m. UTC | #4
On 09/13, Mina Almasry wrote:
> On Thu, Sep 12, 2024 at 2:57 PM Stanislav Fomichev <stfomichev@gmail.com> wrote:
> >
> > On 09/12, Mina Almasry wrote:
> > > On Thu, Sep 12, 2024 at 10:12 AM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > > >
> > > > ncdevmem should (see next patches) print the payload on the stdout.
> > > > The validation can and should be done by the callers:
> > > >
> > > > $ ncdevmem -l ... > file
> > > > $ sha256sum file
> > > >
> > > > Cc: Mina Almasry <almasrymina@google.com>
> > > > Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
> > > > ---
> > > >  tools/testing/selftests/net/ncdevmem.c | 56 +++-----------------------
> > > >  1 file changed, 6 insertions(+), 50 deletions(-)
> > > >
> > > > diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c
> > > > index 352dba211fb0..3712296d997b 100644
> > > > --- a/tools/testing/selftests/net/ncdevmem.c
> > > > +++ b/tools/testing/selftests/net/ncdevmem.c
> > > > @@ -64,24 +64,13 @@
> > > >  static char *server_ip = "192.168.1.4";
> > > >  static char *client_ip = "192.168.1.2";
> > > >  static char *port = "5201";
> > > > -static size_t do_validation;
> > > >  static int start_queue = 8;
> > > >  static int num_queues = 8;
> > > >  static char *ifname = "eth1";
> > > >  static unsigned int ifindex;
> > > >  static unsigned int dmabuf_id;
> > > >
> > > > -void print_bytes(void *ptr, size_t size)
> > > > -{
> > > > -       unsigned char *p = ptr;
> > > > -       int i;
> > > > -
> > > > -       for (i = 0; i < size; i++)
> > > > -               printf("%02hhX ", p[i]);
> > > > -       printf("\n");
> > > > -}
> > > > -
> > > > -void print_nonzero_bytes(void *ptr, size_t size)
> > > > +static void print_nonzero_bytes(void *ptr, size_t size)
> > > >  {
> > > >         unsigned char *p = ptr;
> > > >         unsigned int i;
> > > > @@ -91,30 +80,6 @@ void print_nonzero_bytes(void *ptr, size_t size)
> > > >         printf("\n");
> > > >  }
> > > >
> > > > -void validate_buffer(void *line, size_t size)
> > > > -{
> > > > -       static unsigned char seed = 1;
> > > > -       unsigned char *ptr = line;
> > > > -       int errors = 0;
> > > > -       size_t i;
> > > > -
> > > > -       for (i = 0; i < size; i++) {
> > > > -               if (ptr[i] != seed) {
> > > > -                       fprintf(stderr,
> > > > -                               "Failed validation: expected=%u, actual=%u, index=%lu\n",
> > > > -                               seed, ptr[i], i);
> > > > -                       errors++;
> > >
> > > FWIW the index at where the validation started to fail often gives
> > > critical clues about where the bug is, along with this line, which I'm
> > > glad is not removed:
> > >
> > > printf("received frag_page=%llu, in_page_offset=%llu,
> > > frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu,
> > > dmabuf_id=%u\n",
> > >
> > > I think we can ensure that what is doing the validation above ncdevmem
> > > prints enough context about the error. Although, just to understand
> > > your thinking a bit, why not have this binary do the validation
> > > itself?
> >
> > Right, the debugging stuff will still be there to track the offending
> > part. And the caller can print out the idx of data where the validation
> > failed.
> >
> 
> Sorry to harp on this, but on second thought I don't think just
> printing out the idx is good enough. In many cases all the context
> printed by ncdevmem validation (page_frag/offset/dmabuf_id/etc) is
> useful, and it's useful to have it inline with where the check failed.
> 
> IIUC after your changes the frag_page/offset/dmabuf_id will go to
> stderr output of ncdevmem, but the validation check fail will go to a
> different log by the parent checker. Matching the failure in the 2
> logs in megs of frag output will be annoying.

Yes, it will definitely be more annoying to piece those two things
together. But I don't expect us to debug the payload validation issues
from the nipa dashboard logs. Even if you get a clear message of
"byte at position X is not expected" plus all the chunk info logs,
what do you really do with this info (especially if it's flaky)?

For development we can have some script to put those two things together
for debugging.

> > The reason I removed it from the tool is to be able to do the validation
> > with regular nc on the other side. I just do:
> >
> 
> To be honest I don't think the ncdevmem validation gets in the way of that.
> 
> To test rx, we can set up regular nc on the client side, and have
> ncdevmem do validation on rx.
> To test tx, we can have ncdevmem do tx (no validation), and have a
> script on top of nc do validation on the rx side.
> 
> I guess you may find the lack of symmetry annoying, but IMO it's less
> annoying than losing some debuggability when the test fails.
> 
> I think probably there are 3 hopefully agreeable things we can do here:
> 
> 1. Keep the validation as is and do the rx/tx test as I described above.
> 2. keep the ncdevmem validation, dump it to stderr, and ignore it in
> the test. Leave it be for folks wanting to run the test manually to
> debug it.
> 3. Move the validation to a parent process of ncdevmem, but that
> parent process needs to print full context of the failure in 1 log.
> 
> I prefer #1 TBH.

TBH, I find the existing ncdevmem validation scheme a bit lacking. It is
basically the same payload over and over (or maybe I misread how it
works). Maybe we should implement a PRNG-like sequence for validation
if you prefer to keep it internal?

How about we start with what I have (simple 'hello\nworld') and once
you do tx path, we can add the internal validation back? Both sides,
with a proper selftest this time. For the time being, we can use the existing
rx selftest as a smoke test.

Or I can just drop this patch from the series and let you follow up
on the validation in the selftests (i.e., convert from 'hello\nworld'
to whatever you prefer)
Mina Almasry Sept. 13, 2024, 11:42 p.m. UTC | #5
On Fri, Sep 13, 2024 at 10:15 AM Stanislav Fomichev
<stfomichev@gmail.com> wrote:
>
> On 09/13, Mina Almasry wrote:
> > On Thu, Sep 12, 2024 at 2:57 PM Stanislav Fomichev <stfomichev@gmail.com> wrote:
> > >
> > > On 09/12, Mina Almasry wrote:
> > > > On Thu, Sep 12, 2024 at 10:12 AM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > > > >
> > > > > ncdevmem should (see next patches) print the payload on the stdout.
> > > > > The validation can and should be done by the callers:
> > > > >
> > > > > $ ncdevmem -l ... > file
> > > > > $ sha256sum file
> > > > >
> > > > > Cc: Mina Almasry <almasrymina@google.com>
> > > > > Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
> > > > > ---
> > > > >  tools/testing/selftests/net/ncdevmem.c | 56 +++-----------------------
> > > > >  1 file changed, 6 insertions(+), 50 deletions(-)
> > > > >
> > > > > diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c
> > > > > index 352dba211fb0..3712296d997b 100644
> > > > > --- a/tools/testing/selftests/net/ncdevmem.c
> > > > > +++ b/tools/testing/selftests/net/ncdevmem.c
> > > > > @@ -64,24 +64,13 @@
> > > > >  static char *server_ip = "192.168.1.4";
> > > > >  static char *client_ip = "192.168.1.2";
> > > > >  static char *port = "5201";
> > > > > -static size_t do_validation;
> > > > >  static int start_queue = 8;
> > > > >  static int num_queues = 8;
> > > > >  static char *ifname = "eth1";
> > > > >  static unsigned int ifindex;
> > > > >  static unsigned int dmabuf_id;
> > > > >
> > > > > -void print_bytes(void *ptr, size_t size)
> > > > > -{
> > > > > -       unsigned char *p = ptr;
> > > > > -       int i;
> > > > > -
> > > > > -       for (i = 0; i < size; i++)
> > > > > -               printf("%02hhX ", p[i]);
> > > > > -       printf("\n");
> > > > > -}
> > > > > -
> > > > > -void print_nonzero_bytes(void *ptr, size_t size)
> > > > > +static void print_nonzero_bytes(void *ptr, size_t size)
> > > > >  {
> > > > >         unsigned char *p = ptr;
> > > > >         unsigned int i;
> > > > > @@ -91,30 +80,6 @@ void print_nonzero_bytes(void *ptr, size_t size)
> > > > >         printf("\n");
> > > > >  }
> > > > >
> > > > > -void validate_buffer(void *line, size_t size)
> > > > > -{
> > > > > -       static unsigned char seed = 1;
> > > > > -       unsigned char *ptr = line;
> > > > > -       int errors = 0;
> > > > > -       size_t i;
> > > > > -
> > > > > -       for (i = 0; i < size; i++) {
> > > > > -               if (ptr[i] != seed) {
> > > > > -                       fprintf(stderr,
> > > > > -                               "Failed validation: expected=%u, actual=%u, index=%lu\n",
> > > > > -                               seed, ptr[i], i);
> > > > > -                       errors++;
> > > >
> > > > FWIW the index at where the validation started to fail often gives
> > > > critical clues about where the bug is, along with this line, which I'm
> > > > glad is not removed:
> > > >
> > > > printf("received frag_page=%llu, in_page_offset=%llu,
> > > > frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu,
> > > > dmabuf_id=%u\n",
> > > >
> > > > I think we can ensure that what is doing the validation above ncdevmem
> > > > prints enough context about the error. Although, just to understand
> > > > your thinking a bit, why not have this binary do the validation
> > > > itself?
> > >
> > > Right, the debugging stuff will still be there to track the offending
> > > part. And the caller can print out the idx of data where the validation
> > > failed.
> > >
> >
> > Sorry to harp on this, but on second thought I don't think just
> > printing out the idx is good enough. In many cases all the context
> > printed by ncdevmem validation (page_frag/offset/dmabuf_id/etc) is
> > useful, and it's useful to have it inline with where the check failed.
> >
> > IIUC after your changes the frag_page/offset/dmabuf_id will go to
> > stderr output of ncdevmem, but the validation check fail will go to a
> > different log by the parent checker. Matching the failure in the 2
> > logs in megs of frag output will be annoying.
>
> Yes, it will definitely be more annoying to piece those two things
> together. But I don't expect us to debug the payload validation issues
> from the nipa dashboard logs. Even if you get a clear message of
> "byte at position X is not expected" plus all the chunk info logs,
> what do you really do with this info (especially if it's flaky)?
>

Oh, they provide important clues, even for flakes. See an example of
Taehee using these clues to arrive at a root cause here:
https://lore.kernel.org/netdev/CAMArcTUXm13xJO9XqcT=0uQAn_ZQOQ=Y49EPpHqV+jkkhihMcw@mail.gmail.com/

> For development we can have some script to put those two things together
> for debugging.
>
> > > The reason I removed it from the tool is to be able to do the validation
> > > with regular nc on the other side. I just do:
> > >
> >
> > To be honest I don't think the ncdevmem validation gets in the way of that.
> >
> > To test rx, we can set up regular nc on the client side, and have
> > ncdevmem do validation on rx.
> > To test tx, we can have ncdevmem do tx (no validation), and have a
> > script on top of nc do validation on the rx side.
> >
> > I guess you may find the lack of symmetry annoying, but IMO it's less
> > annoying than losing some debuggability when the test fails.
> >
> > I think probably there are 3 hopefully agreeable things we can do here:
> >
> > 1. Keep the validation as is and do the rx/tx test as I described above.
> > 2. keep the ncdevmem validation, dump it to stderr, and ignore it in
> > the test. Leave it be for folks wanting to run the test manually to
> > debug it.
> > 3. Move the validation to a parent process of ncdevmem, but that
> > parent process needs to print full context of the failure in 1 log.
> >
> > I prefer #1 TBH.
>
> TBH, I find the existing ncdevmem validation scheme a bit lacking. It is
> basically the same payload over and over (or maybe I misread how it
> works). Maybe we should implement a PRNG-like sequence for validation
> if you prefer to keep it internal?
>

It's just a repeating sequence that's easy to implement. Technically
it would not detect if we drop an N iterations in the sequence, but in
practice it's hard to be that unlucky repeatedly, especially if the
number of repeating bytes is not a power of 2. Improving it to a more
robust sequence sounds fine to me.

> How about we start with what I have (simple 'hello\nworld') and once
> you do tx path, we can add the internal validation back? Both sides,
> with a proper selftest this time. For the time being, we can use the existing
> rx selftest as a smoke test.
>
> Or I can just drop this patch from the series and let you follow up
> on the validation in the selftests (i.e., convert from 'hello\nworld'
> to whatever you prefer)

To be honest the validation has been too important in the testing so
far in root causing issues that I would not like it temporarily
removed. Dropping this patch and letting me add validation to NIPA
later on sounds good to me.
diff mbox series

Patch

diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c
index 352dba211fb0..3712296d997b 100644
--- a/tools/testing/selftests/net/ncdevmem.c
+++ b/tools/testing/selftests/net/ncdevmem.c
@@ -64,24 +64,13 @@ 
 static char *server_ip = "192.168.1.4";
 static char *client_ip = "192.168.1.2";
 static char *port = "5201";
-static size_t do_validation;
 static int start_queue = 8;
 static int num_queues = 8;
 static char *ifname = "eth1";
 static unsigned int ifindex;
 static unsigned int dmabuf_id;
 
-void print_bytes(void *ptr, size_t size)
-{
-	unsigned char *p = ptr;
-	int i;
-
-	for (i = 0; i < size; i++)
-		printf("%02hhX ", p[i]);
-	printf("\n");
-}
-
-void print_nonzero_bytes(void *ptr, size_t size)
+static void print_nonzero_bytes(void *ptr, size_t size)
 {
 	unsigned char *p = ptr;
 	unsigned int i;
@@ -91,30 +80,6 @@  void print_nonzero_bytes(void *ptr, size_t size)
 	printf("\n");
 }
 
-void validate_buffer(void *line, size_t size)
-{
-	static unsigned char seed = 1;
-	unsigned char *ptr = line;
-	int errors = 0;
-	size_t i;
-
-	for (i = 0; i < size; i++) {
-		if (ptr[i] != seed) {
-			fprintf(stderr,
-				"Failed validation: expected=%u, actual=%u, index=%lu\n",
-				seed, ptr[i], i);
-			errors++;
-			if (errors > 20)
-				error(1, 0, "validation failed.");
-		}
-		seed++;
-		if (seed == do_validation)
-			seed = 0;
-	}
-
-	fprintf(stdout, "Validated buffer\n");
-}
-
 #define run_command(cmd, ...)                                           \
 	({                                                              \
 		char command[256];                                      \
@@ -414,16 +379,10 @@  int do_server(void)
 			sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_START;
 			ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync);
 
-			if (do_validation)
-				validate_buffer(
-					((unsigned char *)buf_mem) +
-						dmabuf_cmsg->frag_offset,
-					dmabuf_cmsg->frag_size);
-			else
-				print_nonzero_bytes(
-					((unsigned char *)buf_mem) +
-						dmabuf_cmsg->frag_offset,
-					dmabuf_cmsg->frag_size);
+			print_nonzero_bytes(
+				((unsigned char *)buf_mem) +
+					dmabuf_cmsg->frag_offset,
+				dmabuf_cmsg->frag_size);
 
 			sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_END;
 			ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync);
@@ -525,7 +484,7 @@  int main(int argc, char *argv[])
 	int is_server = 0, opt;
 	int probe = 0;
 
-	while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:P")) != -1) {
+	while ((opt = getopt(argc, argv, "ls:c:p:q:t:f:P")) != -1) {
 		switch (opt) {
 		case 'l':
 			is_server = 1;
@@ -539,9 +498,6 @@  int main(int argc, char *argv[])
 		case 'p':
 			port = optarg;
 			break;
-		case 'v':
-			do_validation = atoll(optarg);
-			break;
 		case 'q':
 			num_queues = atoi(optarg);
 			break;