diff mbox series

[bpf-next,v5,6/6] selftests/bpf: Add a series of tests for bpf_snprintf

Message ID 20210419155243.1632274-7-revest@chromium.org (mailing list archive)
State Accepted
Delegated to: BPF
Headers show
Series Add a snprintf eBPF helper | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 6 maintainers not CCed: linux-kselftest@vger.kernel.org netdev@vger.kernel.org kafai@fb.com john.fastabend@gmail.com songliubraving@fb.com shuah@kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: Using vsprintf specifier '%px' potentially exposes the kernel memory layout, if you don't really need the address please consider using '%p'. WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: externs should be avoided in .c files
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success Link

Commit Message

Florent Revest April 19, 2021, 3:52 p.m. UTC
The "positive" part tests all format specifiers when things go well.

The "negative" part makes sure that incorrect format strings fail at
load time.

Signed-off-by: Florent Revest <revest@chromium.org>
---
 .../selftests/bpf/prog_tests/snprintf.c       | 125 ++++++++++++++++++
 .../selftests/bpf/progs/test_snprintf.c       |  73 ++++++++++
 .../bpf/progs/test_snprintf_single.c          |  20 +++
 3 files changed, 218 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/snprintf.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf_single.c

Comments

Andrii Nakryiko April 23, 2021, 10:38 p.m. UTC | #1
On Mon, Apr 19, 2021 at 8:52 AM Florent Revest <revest@chromium.org> wrote:
>
> The "positive" part tests all format specifiers when things go well.
>
> The "negative" part makes sure that incorrect format strings fail at
> load time.
>
> Signed-off-by: Florent Revest <revest@chromium.org>
> ---
>  .../selftests/bpf/prog_tests/snprintf.c       | 125 ++++++++++++++++++
>  .../selftests/bpf/progs/test_snprintf.c       |  73 ++++++++++
>  .../bpf/progs/test_snprintf_single.c          |  20 +++
>  3 files changed, 218 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/prog_tests/snprintf.c
>  create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf.c
>  create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf_single.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
> new file mode 100644
> index 000000000000..a958c22aec75
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
> @@ -0,0 +1,125 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2021 Google LLC. */
> +
> +#include <test_progs.h>
> +#include "test_snprintf.skel.h"
> +#include "test_snprintf_single.skel.h"
> +
> +#define EXP_NUM_OUT  "-8 9 96 -424242 1337 DABBAD00"
> +#define EXP_NUM_RET  sizeof(EXP_NUM_OUT)
> +
> +#define EXP_IP_OUT   "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001"
> +#define EXP_IP_RET   sizeof(EXP_IP_OUT)
> +
> +/* The third specifier, %pB, depends on compiler inlining so don't check it */
> +#define EXP_SYM_OUT  "schedule schedule+0x0/"
> +#define MIN_SYM_RET  sizeof(EXP_SYM_OUT)
> +
> +/* The third specifier, %p, is a hashed pointer which changes on every reboot */
> +#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
> +#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
> +
> +#define EXP_STR_OUT  "str1 longstr"
> +#define EXP_STR_RET  sizeof(EXP_STR_OUT)
> +
> +#define EXP_OVER_OUT "%over"
> +#define EXP_OVER_RET 10
> +
> +#define EXP_PAD_OUT "    4 000"

Roughly 50% of the time I get failure for this test case:

test_snprintf_positive:FAIL:pad_out unexpected pad_out: actual '    4
0000' != expected '    4 000'

Re-running this test case immediately passes. Running again most
probably fails. Please take a look.

> +#define EXP_PAD_RET 900007
> +
> +#define EXP_NO_ARG_OUT "simple case"
> +#define EXP_NO_ARG_RET 12
> +
> +#define EXP_NO_BUF_RET 29
> +

[...]
Florent Revest April 26, 2021, 10:10 a.m. UTC | #2
On Sat, Apr 24, 2021 at 12:38 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Apr 19, 2021 at 8:52 AM Florent Revest <revest@chromium.org> wrote:
> >
> > The "positive" part tests all format specifiers when things go well.
> >
> > The "negative" part makes sure that incorrect format strings fail at
> > load time.
> >
> > Signed-off-by: Florent Revest <revest@chromium.org>
> > ---
> >  .../selftests/bpf/prog_tests/snprintf.c       | 125 ++++++++++++++++++
> >  .../selftests/bpf/progs/test_snprintf.c       |  73 ++++++++++
> >  .../bpf/progs/test_snprintf_single.c          |  20 +++
> >  3 files changed, 218 insertions(+)
> >  create mode 100644 tools/testing/selftests/bpf/prog_tests/snprintf.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf_single.c
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
> > new file mode 100644
> > index 000000000000..a958c22aec75
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
> > @@ -0,0 +1,125 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/* Copyright (c) 2021 Google LLC. */
> > +
> > +#include <test_progs.h>
> > +#include "test_snprintf.skel.h"
> > +#include "test_snprintf_single.skel.h"
> > +
> > +#define EXP_NUM_OUT  "-8 9 96 -424242 1337 DABBAD00"
> > +#define EXP_NUM_RET  sizeof(EXP_NUM_OUT)
> > +
> > +#define EXP_IP_OUT   "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001"
> > +#define EXP_IP_RET   sizeof(EXP_IP_OUT)
> > +
> > +/* The third specifier, %pB, depends on compiler inlining so don't check it */
> > +#define EXP_SYM_OUT  "schedule schedule+0x0/"
> > +#define MIN_SYM_RET  sizeof(EXP_SYM_OUT)
> > +
> > +/* The third specifier, %p, is a hashed pointer which changes on every reboot */
> > +#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
> > +#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
> > +
> > +#define EXP_STR_OUT  "str1 longstr"
> > +#define EXP_STR_RET  sizeof(EXP_STR_OUT)
> > +
> > +#define EXP_OVER_OUT "%over"
> > +#define EXP_OVER_RET 10
> > +
> > +#define EXP_PAD_OUT "    4 000"
>
> Roughly 50% of the time I get failure for this test case:
>
> test_snprintf_positive:FAIL:pad_out unexpected pad_out: actual '    4
> 0000' != expected '    4 000'
>
> Re-running this test case immediately passes. Running again most
> probably fails. Please take a look.

Do you have more information on how to reproduce this ?
I spinned up a VM at 87bd9e602 with ./vmtest -s and then run this script:

#!/bin/sh
for i in `seq 1000`
do
  ./test_progs -t snprintf
  if [ $? -ne 0 ];
  then
    echo FAILURE
    exit 1
  fi
done

The thousand executions passed.

This is a bit concerning because your unexpected_pad_out seems to have
an extra '0' so it ends up with strlen(pad_out)=11 but
sizeof(pad_out)=10. The actual string writing is not really done by
our helper code but by the snprintf implementation (str and str_size
are only given to snprintf()) so I'd expect the truncation to work
well there. I'm a bit puzzled
Andrii Nakryiko April 26, 2021, 4:19 p.m. UTC | #3
On Mon, Apr 26, 2021 at 3:10 AM Florent Revest <revest@chromium.org> wrote:
>
> On Sat, Apr 24, 2021 at 12:38 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Mon, Apr 19, 2021 at 8:52 AM Florent Revest <revest@chromium.org> wrote:
> > >
> > > The "positive" part tests all format specifiers when things go well.
> > >
> > > The "negative" part makes sure that incorrect format strings fail at
> > > load time.
> > >
> > > Signed-off-by: Florent Revest <revest@chromium.org>
> > > ---
> > >  .../selftests/bpf/prog_tests/snprintf.c       | 125 ++++++++++++++++++
> > >  .../selftests/bpf/progs/test_snprintf.c       |  73 ++++++++++
> > >  .../bpf/progs/test_snprintf_single.c          |  20 +++
> > >  3 files changed, 218 insertions(+)
> > >  create mode 100644 tools/testing/selftests/bpf/prog_tests/snprintf.c
> > >  create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf.c
> > >  create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf_single.c
> > >
> > > diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
> > > new file mode 100644
> > > index 000000000000..a958c22aec75
> > > --- /dev/null
> > > +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
> > > @@ -0,0 +1,125 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +/* Copyright (c) 2021 Google LLC. */
> > > +
> > > +#include <test_progs.h>
> > > +#include "test_snprintf.skel.h"
> > > +#include "test_snprintf_single.skel.h"
> > > +
> > > +#define EXP_NUM_OUT  "-8 9 96 -424242 1337 DABBAD00"
> > > +#define EXP_NUM_RET  sizeof(EXP_NUM_OUT)
> > > +
> > > +#define EXP_IP_OUT   "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001"
> > > +#define EXP_IP_RET   sizeof(EXP_IP_OUT)
> > > +
> > > +/* The third specifier, %pB, depends on compiler inlining so don't check it */
> > > +#define EXP_SYM_OUT  "schedule schedule+0x0/"
> > > +#define MIN_SYM_RET  sizeof(EXP_SYM_OUT)
> > > +
> > > +/* The third specifier, %p, is a hashed pointer which changes on every reboot */
> > > +#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
> > > +#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
> > > +
> > > +#define EXP_STR_OUT  "str1 longstr"
> > > +#define EXP_STR_RET  sizeof(EXP_STR_OUT)
> > > +
> > > +#define EXP_OVER_OUT "%over"
> > > +#define EXP_OVER_RET 10
> > > +
> > > +#define EXP_PAD_OUT "    4 000"
> >
> > Roughly 50% of the time I get failure for this test case:
> >
> > test_snprintf_positive:FAIL:pad_out unexpected pad_out: actual '    4
> > 0000' != expected '    4 000'
> >
> > Re-running this test case immediately passes. Running again most
> > probably fails. Please take a look.
>
> Do you have more information on how to reproduce this ?
> I spinned up a VM at 87bd9e602 with ./vmtest -s and then run this script:
>
> #!/bin/sh
> for i in `seq 1000`
> do
>   ./test_progs -t snprintf
>   if [ $? -ne 0 ];
>   then
>     echo FAILURE
>     exit 1
>   fi
> done
>
> The thousand executions passed.
>
> This is a bit concerning because your unexpected_pad_out seems to have
> an extra '0' so it ends up with strlen(pad_out)=11 but
> sizeof(pad_out)=10. The actual string writing is not really done by
> our helper code but by the snprintf implementation (str and str_size
> are only given to snprintf()) so I'd expect the truncation to work
> well there. I'm a bit puzzled

I'm puzzled too, have no idea. I also can't repro this with vmtest.sh.
But I can quite reliably reproduce with my local ArchLinux-based qemu
image with different config (see [0] for config itself). So please try
with my config and see if that helps to repro. If not, I'll have to
debug it on my own later.

  [0] https://gist.github.com/anakryiko/4b6ae21680842bdeacca8fa99d378048
Florent Revest April 26, 2021, 9:08 p.m. UTC | #4
On Mon, Apr 26, 2021 at 6:19 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Apr 26, 2021 at 3:10 AM Florent Revest <revest@chromium.org> wrote:
> >
> > On Sat, Apr 24, 2021 at 12:38 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Mon, Apr 19, 2021 at 8:52 AM Florent Revest <revest@chromium.org> wrote:
> > > >
> > > > The "positive" part tests all format specifiers when things go well.
> > > >
> > > > The "negative" part makes sure that incorrect format strings fail at
> > > > load time.
> > > >
> > > > Signed-off-by: Florent Revest <revest@chromium.org>
> > > > ---
> > > >  .../selftests/bpf/prog_tests/snprintf.c       | 125 ++++++++++++++++++
> > > >  .../selftests/bpf/progs/test_snprintf.c       |  73 ++++++++++
> > > >  .../bpf/progs/test_snprintf_single.c          |  20 +++
> > > >  3 files changed, 218 insertions(+)
> > > >  create mode 100644 tools/testing/selftests/bpf/prog_tests/snprintf.c
> > > >  create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf.c
> > > >  create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf_single.c
> > > >
> > > > diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
> > > > new file mode 100644
> > > > index 000000000000..a958c22aec75
> > > > --- /dev/null
> > > > +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
> > > > @@ -0,0 +1,125 @@
> > > > +// SPDX-License-Identifier: GPL-2.0
> > > > +/* Copyright (c) 2021 Google LLC. */
> > > > +
> > > > +#include <test_progs.h>
> > > > +#include "test_snprintf.skel.h"
> > > > +#include "test_snprintf_single.skel.h"
> > > > +
> > > > +#define EXP_NUM_OUT  "-8 9 96 -424242 1337 DABBAD00"
> > > > +#define EXP_NUM_RET  sizeof(EXP_NUM_OUT)
> > > > +
> > > > +#define EXP_IP_OUT   "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001"
> > > > +#define EXP_IP_RET   sizeof(EXP_IP_OUT)
> > > > +
> > > > +/* The third specifier, %pB, depends on compiler inlining so don't check it */
> > > > +#define EXP_SYM_OUT  "schedule schedule+0x0/"
> > > > +#define MIN_SYM_RET  sizeof(EXP_SYM_OUT)
> > > > +
> > > > +/* The third specifier, %p, is a hashed pointer which changes on every reboot */
> > > > +#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
> > > > +#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
> > > > +
> > > > +#define EXP_STR_OUT  "str1 longstr"
> > > > +#define EXP_STR_RET  sizeof(EXP_STR_OUT)
> > > > +
> > > > +#define EXP_OVER_OUT "%over"
> > > > +#define EXP_OVER_RET 10
> > > > +
> > > > +#define EXP_PAD_OUT "    4 000"
> > >
> > > Roughly 50% of the time I get failure for this test case:
> > >
> > > test_snprintf_positive:FAIL:pad_out unexpected pad_out: actual '    4
> > > 0000' != expected '    4 000'
> > >
> > > Re-running this test case immediately passes. Running again most
> > > probably fails. Please take a look.
> >
> > Do you have more information on how to reproduce this ?
> > I spinned up a VM at 87bd9e602 with ./vmtest -s and then run this script:
> >
> > #!/bin/sh
> > for i in `seq 1000`
> > do
> >   ./test_progs -t snprintf
> >   if [ $? -ne 0 ];
> >   then
> >     echo FAILURE
> >     exit 1
> >   fi
> > done
> >
> > The thousand executions passed.
> >
> > This is a bit concerning because your unexpected_pad_out seems to have
> > an extra '0' so it ends up with strlen(pad_out)=11 but
> > sizeof(pad_out)=10. The actual string writing is not really done by
> > our helper code but by the snprintf implementation (str and str_size
> > are only given to snprintf()) so I'd expect the truncation to work
> > well there. I'm a bit puzzled
>
> I'm puzzled too, have no idea. I also can't repro this with vmtest.sh.
> But I can quite reliably reproduce with my local ArchLinux-based qemu
> image with different config (see [0] for config itself). So please try
> with my config and see if that helps to repro. If not, I'll have to
> debug it on my own later.
>
>   [0] https://gist.github.com/anakryiko/4b6ae21680842bdeacca8fa99d378048

I tried that config on the same commit 87bd9e602 (bpf-next/master)
with my debian-based qemu image and I still can't reproduce the issue
:| If I can be of any help let me know, I'd be happy to help
Rasmus Villemoes April 27, 2021, 6:35 a.m. UTC | #5
On 26/04/2021 23.08, Florent Revest wrote:
> On Mon, Apr 26, 2021 at 6:19 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
>>
>> On Mon, Apr 26, 2021 at 3:10 AM Florent Revest <revest@chromium.org> wrote:
>>>
>>> On Sat, Apr 24, 2021 at 12:38 AM Andrii Nakryiko
>>> <andrii.nakryiko@gmail.com> wrote:
>>>>
>>>> On Mon, Apr 19, 2021 at 8:52 AM Florent Revest <revest@chromium.org> wrote:
>>>>>
>>>>> The "positive" part tests all format specifiers when things go well.
>>>>>
>>>>> The "negative" part makes sure that incorrect format strings fail at
>>>>> load time.
>>>>>
>>>>> Signed-off-by: Florent Revest <revest@chromium.org>
>>>>> ---
>>>>>  .../selftests/bpf/prog_tests/snprintf.c       | 125 ++++++++++++++++++
>>>>>  .../selftests/bpf/progs/test_snprintf.c       |  73 ++++++++++
>>>>>  .../bpf/progs/test_snprintf_single.c          |  20 +++
>>>>>  3 files changed, 218 insertions(+)
>>>>>  create mode 100644 tools/testing/selftests/bpf/prog_tests/snprintf.c
>>>>>  create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf.c
>>>>>  create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf_single.c
>>>>>
>>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
>>>>> new file mode 100644
>>>>> index 000000000000..a958c22aec75
>>>>> --- /dev/null
>>>>> +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
>>>>> @@ -0,0 +1,125 @@
>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>> +/* Copyright (c) 2021 Google LLC. */
>>>>> +
>>>>> +#include <test_progs.h>
>>>>> +#include "test_snprintf.skel.h"
>>>>> +#include "test_snprintf_single.skel.h"
>>>>> +
>>>>> +#define EXP_NUM_OUT  "-8 9 96 -424242 1337 DABBAD00"
>>>>> +#define EXP_NUM_RET  sizeof(EXP_NUM_OUT)
>>>>> +
>>>>> +#define EXP_IP_OUT   "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001"
>>>>> +#define EXP_IP_RET   sizeof(EXP_IP_OUT)
>>>>> +
>>>>> +/* The third specifier, %pB, depends on compiler inlining so don't check it */
>>>>> +#define EXP_SYM_OUT  "schedule schedule+0x0/"
>>>>> +#define MIN_SYM_RET  sizeof(EXP_SYM_OUT)
>>>>> +
>>>>> +/* The third specifier, %p, is a hashed pointer which changes on every reboot */
>>>>> +#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
>>>>> +#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
>>>>> +
>>>>> +#define EXP_STR_OUT  "str1 longstr"
>>>>> +#define EXP_STR_RET  sizeof(EXP_STR_OUT)
>>>>> +
>>>>> +#define EXP_OVER_OUT "%over"
>>>>> +#define EXP_OVER_RET 10
>>>>> +
>>>>> +#define EXP_PAD_OUT "    4 000"
>>>>
>>>> Roughly 50% of the time I get failure for this test case:
>>>>
>>>> test_snprintf_positive:FAIL:pad_out unexpected pad_out: actual '    4
>>>> 0000' != expected '    4 000'
>>>>
>>>> Re-running this test case immediately passes. Running again most
>>>> probably fails. Please take a look.
>>>
>>> Do you have more information on how to reproduce this ?
>>> I spinned up a VM at 87bd9e602 with ./vmtest -s and then run this script:
>>>
>>> #!/bin/sh
>>> for i in `seq 1000`
>>> do
>>>   ./test_progs -t snprintf
>>>   if [ $? -ne 0 ];
>>>   then
>>>     echo FAILURE
>>>     exit 1
>>>   fi
>>> done
>>>
>>> The thousand executions passed.
>>>
>>> This is a bit concerning because your unexpected_pad_out seems to have
>>> an extra '0' so it ends up with strlen(pad_out)=11 but
>>> sizeof(pad_out)=10. The actual string writing is not really done by
>>> our helper code but by the snprintf implementation (str and str_size
>>> are only given to snprintf()) so I'd expect the truncation to work
>>> well there. I'm a bit puzzled
>>
>> I'm puzzled too, have no idea. I also can't repro this with vmtest.sh.
>> But I can quite reliably reproduce with my local ArchLinux-based qemu
>> image with different config (see [0] for config itself). So please try
>> with my config and see if that helps to repro. If not, I'll have to
>> debug it on my own later.
>>
>>   [0] https://gist.github.com/anakryiko/4b6ae21680842bdeacca8fa99d378048
> 
> I tried that config on the same commit 87bd9e602 (bpf-next/master)
> with my debian-based qemu image and I still can't reproduce the issue
> :| If I can be of any help let me know, I'd be happy to help
> 

It's not really clear to me if this is before or after the rewrite to
use bprintf, but regardless, in those two patches this caught my attention:

 	u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
-	enum bpf_printf_mod_type mod[MAX_TRACE_PRINTK_VARARGS];
+	u32 *bin_args;
 	static char buf[BPF_TRACE_PRINTK_SIZE];
 	unsigned long flags;
 	int ret;

-	ret = bpf_printf_prepare(fmt, fmt_size, args, args, mod,
-				 MAX_TRACE_PRINTK_VARARGS);
+	ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
+				  MAX_TRACE_PRINTK_VARARGS);
 	if (ret < 0)
 		return ret;

-	ret = snprintf(buf, sizeof(buf), fmt, BPF_CAST_FMT_ARG(0, args, mod),
-		BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod));
-	/* snprintf() will not append null for zero-length strings */
-	if (ret == 0)
-		buf[0] = '\0';
+	ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);

 	raw_spin_lock_irqsave(&trace_printk_lock, flags);
 	trace_bpf_trace_printk(buf);
 	raw_spin_unlock_irqrestore(&trace_printk_lock, flags);

Why isn't the write to buf[] protected by that spinlock? Or put another
way, what protects buf[] from concurrent writes?

Probably the test cases are not run in parallel, but this is the kind of
thing that would give those symptoms.

Rasmus
Florent Revest April 27, 2021, 9:50 a.m. UTC | #6
On Tue, Apr 27, 2021 at 8:35 AM Rasmus Villemoes
<linux@rasmusvillemoes.dk> wrote:
>
> On 26/04/2021 23.08, Florent Revest wrote:
> > On Mon, Apr 26, 2021 at 6:19 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> >>
> >> On Mon, Apr 26, 2021 at 3:10 AM Florent Revest <revest@chromium.org> wrote:
> >>>
> >>> On Sat, Apr 24, 2021 at 12:38 AM Andrii Nakryiko
> >>> <andrii.nakryiko@gmail.com> wrote:
> >>>>
> >>>> On Mon, Apr 19, 2021 at 8:52 AM Florent Revest <revest@chromium.org> wrote:
> >>>>>
> >>>>> The "positive" part tests all format specifiers when things go well.
> >>>>>
> >>>>> The "negative" part makes sure that incorrect format strings fail at
> >>>>> load time.
> >>>>>
> >>>>> Signed-off-by: Florent Revest <revest@chromium.org>
> >>>>> ---
> >>>>>  .../selftests/bpf/prog_tests/snprintf.c       | 125 ++++++++++++++++++
> >>>>>  .../selftests/bpf/progs/test_snprintf.c       |  73 ++++++++++
> >>>>>  .../bpf/progs/test_snprintf_single.c          |  20 +++
> >>>>>  3 files changed, 218 insertions(+)
> >>>>>  create mode 100644 tools/testing/selftests/bpf/prog_tests/snprintf.c
> >>>>>  create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf.c
> >>>>>  create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf_single.c
> >>>>>
> >>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
> >>>>> new file mode 100644
> >>>>> index 000000000000..a958c22aec75
> >>>>> --- /dev/null
> >>>>> +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
> >>>>> @@ -0,0 +1,125 @@
> >>>>> +// SPDX-License-Identifier: GPL-2.0
> >>>>> +/* Copyright (c) 2021 Google LLC. */
> >>>>> +
> >>>>> +#include <test_progs.h>
> >>>>> +#include "test_snprintf.skel.h"
> >>>>> +#include "test_snprintf_single.skel.h"
> >>>>> +
> >>>>> +#define EXP_NUM_OUT  "-8 9 96 -424242 1337 DABBAD00"
> >>>>> +#define EXP_NUM_RET  sizeof(EXP_NUM_OUT)
> >>>>> +
> >>>>> +#define EXP_IP_OUT   "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001"
> >>>>> +#define EXP_IP_RET   sizeof(EXP_IP_OUT)
> >>>>> +
> >>>>> +/* The third specifier, %pB, depends on compiler inlining so don't check it */
> >>>>> +#define EXP_SYM_OUT  "schedule schedule+0x0/"
> >>>>> +#define MIN_SYM_RET  sizeof(EXP_SYM_OUT)
> >>>>> +
> >>>>> +/* The third specifier, %p, is a hashed pointer which changes on every reboot */
> >>>>> +#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
> >>>>> +#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
> >>>>> +
> >>>>> +#define EXP_STR_OUT  "str1 longstr"
> >>>>> +#define EXP_STR_RET  sizeof(EXP_STR_OUT)
> >>>>> +
> >>>>> +#define EXP_OVER_OUT "%over"
> >>>>> +#define EXP_OVER_RET 10
> >>>>> +
> >>>>> +#define EXP_PAD_OUT "    4 000"
> >>>>
> >>>> Roughly 50% of the time I get failure for this test case:
> >>>>
> >>>> test_snprintf_positive:FAIL:pad_out unexpected pad_out: actual '    4
> >>>> 0000' != expected '    4 000'
> >>>>
> >>>> Re-running this test case immediately passes. Running again most
> >>>> probably fails. Please take a look.
> >>>
> >>> Do you have more information on how to reproduce this ?
> >>> I spinned up a VM at 87bd9e602 with ./vmtest -s and then run this script:
> >>>
> >>> #!/bin/sh
> >>> for i in `seq 1000`
> >>> do
> >>>   ./test_progs -t snprintf
> >>>   if [ $? -ne 0 ];
> >>>   then
> >>>     echo FAILURE
> >>>     exit 1
> >>>   fi
> >>> done
> >>>
> >>> The thousand executions passed.
> >>>
> >>> This is a bit concerning because your unexpected_pad_out seems to have
> >>> an extra '0' so it ends up with strlen(pad_out)=11 but
> >>> sizeof(pad_out)=10. The actual string writing is not really done by
> >>> our helper code but by the snprintf implementation (str and str_size
> >>> are only given to snprintf()) so I'd expect the truncation to work
> >>> well there. I'm a bit puzzled
> >>
> >> I'm puzzled too, have no idea. I also can't repro this with vmtest.sh.
> >> But I can quite reliably reproduce with my local ArchLinux-based qemu
> >> image with different config (see [0] for config itself). So please try
> >> with my config and see if that helps to repro. If not, I'll have to
> >> debug it on my own later.
> >>
> >>   [0] https://gist.github.com/anakryiko/4b6ae21680842bdeacca8fa99d378048
> >
> > I tried that config on the same commit 87bd9e602 (bpf-next/master)
> > with my debian-based qemu image and I still can't reproduce the issue
> > :| If I can be of any help let me know, I'd be happy to help
> >
>
> It's not really clear to me if this is before or after the rewrite to
> use bprintf, but regardless, in those two patches this caught my attention:

I tried to reproduce Andrii's bug both before and after the bprintf
rewrite but I think he meant before.

>         u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
> -       enum bpf_printf_mod_type mod[MAX_TRACE_PRINTK_VARARGS];
> +       u32 *bin_args;
>         static char buf[BPF_TRACE_PRINTK_SIZE];
>         unsigned long flags;
>         int ret;
>
> -       ret = bpf_printf_prepare(fmt, fmt_size, args, args, mod,
> -                                MAX_TRACE_PRINTK_VARARGS);
> +       ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
> +                                 MAX_TRACE_PRINTK_VARARGS);
>         if (ret < 0)
>                 return ret;
>
> -       ret = snprintf(buf, sizeof(buf), fmt, BPF_CAST_FMT_ARG(0, args, mod),
> -               BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod));
> -       /* snprintf() will not append null for zero-length strings */
> -       if (ret == 0)
> -               buf[0] = '\0';
> +       ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
>
>         raw_spin_lock_irqsave(&trace_printk_lock, flags);
>         trace_bpf_trace_printk(buf);
>         raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
>
> Why isn't the write to buf[] protected by that spinlock? Or put another
> way, what protects buf[] from concurrent writes?

You're right, that is a bug, I missed that buf was static and thought
it was just on the stack. That snprintf call should be after the
raw_spin_lock_irqsave. I'll send a patch. Thank you Rasmus. (before my
snprintf series, there was a vsprintf after the raw_spin_lock_irqsave)

> Probably the test cases are not run in parallel, but this is the kind of
> thing that would give those symptoms.

I think it's a separate issue from what Andrii reported though because
the flaky test exercises the bpf_snprintf helper and this buf spinlock
bug you just found only affects the bpf_trace_printk helper.

That being said, it does smell a little bit like a concurrency issue
too, indeed. The bpf_snprintf test program is a raw_tp/sys_enter so it
attaches to all syscall entries and most likely gets executed many
more times than necessary and probably on parallel CPUs. The "pad_out"
buffer they write to is unique and not locked so maybe the test's
userspace reads pad_out while another CPU is writing on it and if the
string output goes through a stage where it is "    4 0000" before
being "    4 000", we might read at the wrong time. That being said, I
would find it weird that this happens as much as 50% of the time and
always specifically on that test case.

Andrii could you maybe try changing the prog type to
"tp/syscalls/sys_enter_nanosleep" on the machine where you can
reproduce this bug ?

> Rasmus
Andrii Nakryiko April 27, 2021, 6:03 p.m. UTC | #7
On Tue, Apr 27, 2021 at 2:51 AM Florent Revest <revest@chromium.org> wrote:
>
> On Tue, Apr 27, 2021 at 8:35 AM Rasmus Villemoes
> <linux@rasmusvillemoes.dk> wrote:
> >
> > On 26/04/2021 23.08, Florent Revest wrote:
> > > On Mon, Apr 26, 2021 at 6:19 PM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > >>
> > >> On Mon, Apr 26, 2021 at 3:10 AM Florent Revest <revest@chromium.org> wrote:
> > >>>
> > >>> On Sat, Apr 24, 2021 at 12:38 AM Andrii Nakryiko
> > >>> <andrii.nakryiko@gmail.com> wrote:
> > >>>>
> > >>>> On Mon, Apr 19, 2021 at 8:52 AM Florent Revest <revest@chromium.org> wrote:
> > >>>>>
> > >>>>> The "positive" part tests all format specifiers when things go well.
> > >>>>>
> > >>>>> The "negative" part makes sure that incorrect format strings fail at
> > >>>>> load time.
> > >>>>>
> > >>>>> Signed-off-by: Florent Revest <revest@chromium.org>
> > >>>>> ---
> > >>>>>  .../selftests/bpf/prog_tests/snprintf.c       | 125 ++++++++++++++++++
> > >>>>>  .../selftests/bpf/progs/test_snprintf.c       |  73 ++++++++++
> > >>>>>  .../bpf/progs/test_snprintf_single.c          |  20 +++
> > >>>>>  3 files changed, 218 insertions(+)
> > >>>>>  create mode 100644 tools/testing/selftests/bpf/prog_tests/snprintf.c
> > >>>>>  create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf.c
> > >>>>>  create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf_single.c
> > >>>>>
> > >>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
> > >>>>> new file mode 100644
> > >>>>> index 000000000000..a958c22aec75
> > >>>>> --- /dev/null
> > >>>>> +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
> > >>>>> @@ -0,0 +1,125 @@
> > >>>>> +// SPDX-License-Identifier: GPL-2.0
> > >>>>> +/* Copyright (c) 2021 Google LLC. */
> > >>>>> +
> > >>>>> +#include <test_progs.h>
> > >>>>> +#include "test_snprintf.skel.h"
> > >>>>> +#include "test_snprintf_single.skel.h"
> > >>>>> +
> > >>>>> +#define EXP_NUM_OUT  "-8 9 96 -424242 1337 DABBAD00"
> > >>>>> +#define EXP_NUM_RET  sizeof(EXP_NUM_OUT)
> > >>>>> +
> > >>>>> +#define EXP_IP_OUT   "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001"
> > >>>>> +#define EXP_IP_RET   sizeof(EXP_IP_OUT)
> > >>>>> +
> > >>>>> +/* The third specifier, %pB, depends on compiler inlining so don't check it */
> > >>>>> +#define EXP_SYM_OUT  "schedule schedule+0x0/"
> > >>>>> +#define MIN_SYM_RET  sizeof(EXP_SYM_OUT)
> > >>>>> +
> > >>>>> +/* The third specifier, %p, is a hashed pointer which changes on every reboot */
> > >>>>> +#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
> > >>>>> +#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
> > >>>>> +
> > >>>>> +#define EXP_STR_OUT  "str1 longstr"
> > >>>>> +#define EXP_STR_RET  sizeof(EXP_STR_OUT)
> > >>>>> +
> > >>>>> +#define EXP_OVER_OUT "%over"
> > >>>>> +#define EXP_OVER_RET 10
> > >>>>> +
> > >>>>> +#define EXP_PAD_OUT "    4 000"
> > >>>>
> > >>>> Roughly 50% of the time I get failure for this test case:
> > >>>>
> > >>>> test_snprintf_positive:FAIL:pad_out unexpected pad_out: actual '    4
> > >>>> 0000' != expected '    4 000'
> > >>>>
> > >>>> Re-running this test case immediately passes. Running again most
> > >>>> probably fails. Please take a look.
> > >>>
> > >>> Do you have more information on how to reproduce this ?
> > >>> I spinned up a VM at 87bd9e602 with ./vmtest -s and then run this script:
> > >>>
> > >>> #!/bin/sh
> > >>> for i in `seq 1000`
> > >>> do
> > >>>   ./test_progs -t snprintf
> > >>>   if [ $? -ne 0 ];
> > >>>   then
> > >>>     echo FAILURE
> > >>>     exit 1
> > >>>   fi
> > >>> done
> > >>>
> > >>> The thousand executions passed.
> > >>>
> > >>> This is a bit concerning because your unexpected_pad_out seems to have
> > >>> an extra '0' so it ends up with strlen(pad_out)=11 but
> > >>> sizeof(pad_out)=10. The actual string writing is not really done by
> > >>> our helper code but by the snprintf implementation (str and str_size
> > >>> are only given to snprintf()) so I'd expect the truncation to work
> > >>> well there. I'm a bit puzzled
> > >>
> > >> I'm puzzled too, have no idea. I also can't repro this with vmtest.sh.
> > >> But I can quite reliably reproduce with my local ArchLinux-based qemu
> > >> image with different config (see [0] for config itself). So please try
> > >> with my config and see if that helps to repro. If not, I'll have to
> > >> debug it on my own later.
> > >>
> > >>   [0] https://gist.github.com/anakryiko/4b6ae21680842bdeacca8fa99d378048
> > >
> > > I tried that config on the same commit 87bd9e602 (bpf-next/master)
> > > with my debian-based qemu image and I still can't reproduce the issue
> > > :| If I can be of any help let me know, I'd be happy to help
> > >
> >
> > It's not really clear to me if this is before or after the rewrite to
> > use bprintf, but regardless, in those two patches this caught my attention:
>
> I tried to reproduce Andrii's bug both before and after the bprintf
> rewrite but I think he meant before.

I'm running on the latest bpf-next master, but I don't think it's
related to bprintf change.

>
> >         u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
> > -       enum bpf_printf_mod_type mod[MAX_TRACE_PRINTK_VARARGS];
> > +       u32 *bin_args;
> >         static char buf[BPF_TRACE_PRINTK_SIZE];
> >         unsigned long flags;
> >         int ret;
> >
> > -       ret = bpf_printf_prepare(fmt, fmt_size, args, args, mod,
> > -                                MAX_TRACE_PRINTK_VARARGS);
> > +       ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
> > +                                 MAX_TRACE_PRINTK_VARARGS);
> >         if (ret < 0)
> >                 return ret;
> >
> > -       ret = snprintf(buf, sizeof(buf), fmt, BPF_CAST_FMT_ARG(0, args, mod),
> > -               BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod));
> > -       /* snprintf() will not append null for zero-length strings */
> > -       if (ret == 0)
> > -               buf[0] = '\0';
> > +       ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
> >
> >         raw_spin_lock_irqsave(&trace_printk_lock, flags);
> >         trace_bpf_trace_printk(buf);
> >         raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
> >
> > Why isn't the write to buf[] protected by that spinlock? Or put another
> > way, what protects buf[] from concurrent writes?
>
> You're right, that is a bug, I missed that buf was static and thought
> it was just on the stack. That snprintf call should be after the
> raw_spin_lock_irqsave. I'll send a patch. Thank you Rasmus. (before my
> snprintf series, there was a vsprintf after the raw_spin_lock_irqsave)

Can you please also clean up unnecessary ()s you added in at least a
few places. Thanks.

>
> > Probably the test cases are not run in parallel, but this is the kind of
> > thing that would give those symptoms.
>
> I think it's a separate issue from what Andrii reported though because
> the flaky test exercises the bpf_snprintf helper and this buf spinlock
> bug you just found only affects the bpf_trace_printk helper.
>
> That being said, it does smell a little bit like a concurrency issue
> too, indeed. The bpf_snprintf test program is a raw_tp/sys_enter so it
> attaches to all syscall entries and most likely gets executed many
> more times than necessary and probably on parallel CPUs. The "pad_out"
> buffer they write to is unique and not locked so maybe the test's
> userspace reads pad_out while another CPU is writing on it and if the
> string output goes through a stage where it is "    4 0000" before
> being "    4 000", we might read at the wrong time. That being said, I
> would find it weird that this happens as much as 50% of the time and
> always specifically on that test case.
>
> Andrii could you maybe try changing the prog type to
> "tp/syscalls/sys_enter_nanosleep" on the machine where you can
> reproduce this bug ?

Yes, it helps. I can't repro it easily anymore. I think the right fix,
though, should be to filter by tid, not change the tracepoint.

I think what's happening is we see the string right before bstr_printf
does zero-termination with end[-1] = '\0'; So in some cases we see
truncated string, in others we see untruncated one.

>
> > Rasmus
Florent Revest April 28, 2021, 2:59 p.m. UTC | #8
On Tue, Apr 27, 2021 at 8:03 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Tue, Apr 27, 2021 at 2:51 AM Florent Revest <revest@chromium.org> wrote:
> >
> > On Tue, Apr 27, 2021 at 8:35 AM Rasmus Villemoes
> > <linux@rasmusvillemoes.dk> wrote:
> > >         u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
> > > -       enum bpf_printf_mod_type mod[MAX_TRACE_PRINTK_VARARGS];
> > > +       u32 *bin_args;
> > >         static char buf[BPF_TRACE_PRINTK_SIZE];
> > >         unsigned long flags;
> > >         int ret;
> > >
> > > -       ret = bpf_printf_prepare(fmt, fmt_size, args, args, mod,
> > > -                                MAX_TRACE_PRINTK_VARARGS);
> > > +       ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
> > > +                                 MAX_TRACE_PRINTK_VARARGS);
> > >         if (ret < 0)
> > >                 return ret;
> > >
> > > -       ret = snprintf(buf, sizeof(buf), fmt, BPF_CAST_FMT_ARG(0, args, mod),
> > > -               BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod));
> > > -       /* snprintf() will not append null for zero-length strings */
> > > -       if (ret == 0)
> > > -               buf[0] = '\0';
> > > +       ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
> > >
> > >         raw_spin_lock_irqsave(&trace_printk_lock, flags);
> > >         trace_bpf_trace_printk(buf);
> > >         raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
> > >
> > > Why isn't the write to buf[] protected by that spinlock? Or put another
> > > way, what protects buf[] from concurrent writes?
> >
> > You're right, that is a bug, I missed that buf was static and thought
> > it was just on the stack. That snprintf call should be after the
> > raw_spin_lock_irqsave. I'll send a patch. Thank you Rasmus. (before my
> > snprintf series, there was a vsprintf after the raw_spin_lock_irqsave)

Solved now

> Can you please also clean up unnecessary ()s you added in at least a
> few places. Thanks.

Alexei said he took care of this .:)

> > > Probably the test cases are not run in parallel, but this is the kind of
> > > thing that would give those symptoms.
> >
> > I think it's a separate issue from what Andrii reported though because
> > the flaky test exercises the bpf_snprintf helper and this buf spinlock
> > bug you just found only affects the bpf_trace_printk helper.
> >
> > That being said, it does smell a little bit like a concurrency issue
> > too, indeed. The bpf_snprintf test program is a raw_tp/sys_enter so it
> > attaches to all syscall entries and most likely gets executed many
> > more times than necessary and probably on parallel CPUs. The "pad_out"
> > buffer they write to is unique and not locked so maybe the test's
> > userspace reads pad_out while another CPU is writing on it and if the
> > string output goes through a stage where it is "    4 0000" before
> > being "    4 000", we might read at the wrong time. That being said, I
> > would find it weird that this happens as much as 50% of the time and
> > always specifically on that test case.
> >
> > Andrii could you maybe try changing the prog type to
> > "tp/syscalls/sys_enter_nanosleep" on the machine where you can
> > reproduce this bug ?
>
> Yes, it helps. I can't repro it easily anymore.

Good, so it does sound like a concurrency issue indeed

> I think the right fix, though, should be to filter by tid, not change the tracepoint.

Agreed, I'll send a patch for that today. :)

> I think what's happening is we see the string right before bstr_printf
> does zero-termination with end[-1] = '\0'; So in some cases we see
> truncated string, in others we see untruncated one.

Makes sense but I still wonder why it happens so often (50% of the
time is really a lot) and why it is consistently that one test case
that fails and not the "overflow" case for example. But I'm confident
that this is not a bug in the helper now and that the tid filter will
fix the test.
Rasmus Villemoes May 5, 2021, 6:55 a.m. UTC | #9
On 28/04/2021 16.59, Florent Revest wrote:
> On Tue, Apr 27, 2021 at 8:03 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
>>
>> On Tue, Apr 27, 2021 at 2:51 AM Florent Revest <revest@chromium.org> wrote:
>>>
>>> On Tue, Apr 27, 2021 at 8:35 AM Rasmus Villemoes
>>> <linux@rasmusvillemoes.dk> wrote:
>>>>         u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
>>>> -       enum bpf_printf_mod_type mod[MAX_TRACE_PRINTK_VARARGS];
>>>> +       u32 *bin_args;
>>>>         static char buf[BPF_TRACE_PRINTK_SIZE];
>>>>         unsigned long flags;
>>>>         int ret;
>>>>
>>>> -       ret = bpf_printf_prepare(fmt, fmt_size, args, args, mod,
>>>> -                                MAX_TRACE_PRINTK_VARARGS);
>>>> +       ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
>>>> +                                 MAX_TRACE_PRINTK_VARARGS);
>>>>         if (ret < 0)
>>>>                 return ret;
>>>>
>>>> -       ret = snprintf(buf, sizeof(buf), fmt, BPF_CAST_FMT_ARG(0, args, mod),
>>>> -               BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod));
>>>> -       /* snprintf() will not append null for zero-length strings */
>>>> -       if (ret == 0)
>>>> -               buf[0] = '\0';
>>>> +       ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
>>>>
>>>>         raw_spin_lock_irqsave(&trace_printk_lock, flags);
>>>>         trace_bpf_trace_printk(buf);
>>>>         raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
>>>>
>>>> Why isn't the write to buf[] protected by that spinlock? Or put another
>>>> way, what protects buf[] from concurrent writes?
>>>
>>> You're right, that is a bug, I missed that buf was static and thought
>>> it was just on the stack. That snprintf call should be after the
>>> raw_spin_lock_irqsave. I'll send a patch. Thank you Rasmus. (before my
>>> snprintf series, there was a vsprintf after the raw_spin_lock_irqsave)
> 
> Solved now
> 
>> Can you please also clean up unnecessary ()s you added in at least a
>> few places. Thanks.
> 
> Alexei said he took care of this .:)
> 
>>>> Probably the test cases are not run in parallel, but this is the kind of
>>>> thing that would give those symptoms.
>>>
>>> I think it's a separate issue from what Andrii reported though because
>>> the flaky test exercises the bpf_snprintf helper and this buf spinlock
>>> bug you just found only affects the bpf_trace_printk helper.
>>>
>>> That being said, it does smell a little bit like a concurrency issue
>>> too, indeed. The bpf_snprintf test program is a raw_tp/sys_enter so it
>>> attaches to all syscall entries and most likely gets executed many
>>> more times than necessary and probably on parallel CPUs. The "pad_out"
>>> buffer they write to is unique and not locked so maybe the test's
>>> userspace reads pad_out while another CPU is writing on it and if the
>>> string output goes through a stage where it is "    4 0000" before
>>> being "    4 000", we might read at the wrong time. That being said, I
>>> would find it weird that this happens as much as 50% of the time and
>>> always specifically on that test case.
>>>
>>> Andrii could you maybe try changing the prog type to
>>> "tp/syscalls/sys_enter_nanosleep" on the machine where you can
>>> reproduce this bug ?
>>
>> Yes, it helps. I can't repro it easily anymore.
> 
> Good, so it does sound like a concurrency issue indeed
> 
>> I think the right fix, though, should be to filter by tid, not change the tracepoint.
> 
> Agreed, I'll send a patch for that today. :)
> 
>> I think what's happening is we see the string right before bstr_printf
>> does zero-termination with end[-1] = '\0'; So in some cases we see
>> truncated string, in others we see untruncated one.
> 
> Makes sense but I still wonder why it happens so often (50% of the
> time is really a lot) and why it is consistently that one test case
> that fails and not the "overflow" case for example. But I'm confident
> that this is not a bug in the helper now and that the tid filter will
> fix the test.
> 

If the caller, or one of its sibling threads, inspects the buffer before
(v)snprintf has returned it's very obviously a bug in the caller. As for
why that particular case exposes the race: It seems to be the only one
that "expects" an insanely long result, and it takes a very very long
time (several cycles per byte I'd assume) for the vsnprintf code to very
carefully go through the

  if (buf < end)
     *buf = /* '0' or ' ' or whatever it is it is emitting here */
  buf++;

900k times. So there's simply a very large window where the buffer
contents is "    4 0000" while number() is still 'emitting' 0s until
control returns to vsnprintf() which does that final end[-1] = '\0'.

Rasmus
Florent Revest May 5, 2021, 2:25 p.m. UTC | #10
On Wed, May 5, 2021 at 8:55 AM Rasmus Villemoes
<linux@rasmusvillemoes.dk> wrote:
>
> On 28/04/2021 16.59, Florent Revest wrote:
> > On Tue, Apr 27, 2021 at 8:03 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> >>
> >> On Tue, Apr 27, 2021 at 2:51 AM Florent Revest <revest@chromium.org> wrote:
> >>>
> >>> On Tue, Apr 27, 2021 at 8:35 AM Rasmus Villemoes
> >>> <linux@rasmusvillemoes.dk> wrote:
> >>>> Probably the test cases are not run in parallel, but this is the kind of
> >>>> thing that would give those symptoms.
> >>>
> >>> I think it's a separate issue from what Andrii reported though because
> >>> the flaky test exercises the bpf_snprintf helper and this buf spinlock
> >>> bug you just found only affects the bpf_trace_printk helper.
> >>>
> >>> That being said, it does smell a little bit like a concurrency issue
> >>> too, indeed. The bpf_snprintf test program is a raw_tp/sys_enter so it
> >>> attaches to all syscall entries and most likely gets executed many
> >>> more times than necessary and probably on parallel CPUs. The "pad_out"
> >>> buffer they write to is unique and not locked so maybe the test's
> >>> userspace reads pad_out while another CPU is writing on it and if the
> >>> string output goes through a stage where it is "    4 0000" before
> >>> being "    4 000", we might read at the wrong time. That being said, I
> >>> would find it weird that this happens as much as 50% of the time and
> >>> always specifically on that test case.
> >>>
> >>> Andrii could you maybe try changing the prog type to
> >>> "tp/syscalls/sys_enter_nanosleep" on the machine where you can
> >>> reproduce this bug ?
> >>
> >> Yes, it helps. I can't repro it easily anymore.
> >
> > Good, so it does sound like a concurrency issue indeed
> >
> >> I think the right fix, though, should be to filter by tid, not change the tracepoint.
> >
> > Agreed, I'll send a patch for that today. :)
> >
> >> I think what's happening is we see the string right before bstr_printf
> >> does zero-termination with end[-1] = '\0'; So in some cases we see
> >> truncated string, in others we see untruncated one.
> >
> > Makes sense but I still wonder why it happens so often (50% of the
> > time is really a lot) and why it is consistently that one test case
> > that fails and not the "overflow" case for example. But I'm confident
> > that this is not a bug in the helper now and that the tid filter will
> > fix the test.
> >
>
> If the caller, or one of its sibling threads, inspects the buffer before
> (v)snprintf has returned it's very obviously a bug in the caller. As for

Absolutely

> why that particular case exposes the race: It seems to be the only one
> that "expects" an insanely long result, and it takes a very very long
> time (several cycles per byte I'd assume) for the vsnprintf code to very
> carefully go through the
>
>   if (buf < end)
>      *buf = /* '0' or ' ' or whatever it is it is emitting here */
>   buf++;
>
> 900k times. So there's simply a very large window where the buffer
> contents is "    4 0000" while number() is still 'emitting' 0s until
> control returns to vsnprintf() which does that final end[-1] = '\0'.

Aaah, of course, it makes complete sense! :) Thank you Rasmus
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
new file mode 100644
index 000000000000..a958c22aec75
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -0,0 +1,125 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <test_progs.h>
+#include "test_snprintf.skel.h"
+#include "test_snprintf_single.skel.h"
+
+#define EXP_NUM_OUT  "-8 9 96 -424242 1337 DABBAD00"
+#define EXP_NUM_RET  sizeof(EXP_NUM_OUT)
+
+#define EXP_IP_OUT   "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001"
+#define EXP_IP_RET   sizeof(EXP_IP_OUT)
+
+/* The third specifier, %pB, depends on compiler inlining so don't check it */
+#define EXP_SYM_OUT  "schedule schedule+0x0/"
+#define MIN_SYM_RET  sizeof(EXP_SYM_OUT)
+
+/* The third specifier, %p, is a hashed pointer which changes on every reboot */
+#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
+#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
+
+#define EXP_STR_OUT  "str1 longstr"
+#define EXP_STR_RET  sizeof(EXP_STR_OUT)
+
+#define EXP_OVER_OUT "%over"
+#define EXP_OVER_RET 10
+
+#define EXP_PAD_OUT "    4 000"
+#define EXP_PAD_RET 900007
+
+#define EXP_NO_ARG_OUT "simple case"
+#define EXP_NO_ARG_RET 12
+
+#define EXP_NO_BUF_RET 29
+
+void test_snprintf_positive(void)
+{
+	char exp_addr_out[] = EXP_ADDR_OUT;
+	char exp_sym_out[]  = EXP_SYM_OUT;
+	struct test_snprintf *skel;
+
+	skel = test_snprintf__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	if (!ASSERT_OK(test_snprintf__attach(skel), "skel_attach"))
+		goto cleanup;
+
+	/* trigger tracepoint */
+	usleep(1);
+
+	ASSERT_STREQ(skel->bss->num_out, EXP_NUM_OUT, "num_out");
+	ASSERT_EQ(skel->bss->num_ret, EXP_NUM_RET, "num_ret");
+
+	ASSERT_STREQ(skel->bss->ip_out, EXP_IP_OUT, "ip_out");
+	ASSERT_EQ(skel->bss->ip_ret, EXP_IP_RET, "ip_ret");
+
+	ASSERT_OK(memcmp(skel->bss->sym_out, exp_sym_out,
+			 sizeof(exp_sym_out) - 1), "sym_out");
+	ASSERT_LT(MIN_SYM_RET, skel->bss->sym_ret, "sym_ret");
+
+	ASSERT_OK(memcmp(skel->bss->addr_out, exp_addr_out,
+			 sizeof(exp_addr_out) - 1), "addr_out");
+	ASSERT_EQ(skel->bss->addr_ret, EXP_ADDR_RET, "addr_ret");
+
+	ASSERT_STREQ(skel->bss->str_out, EXP_STR_OUT, "str_out");
+	ASSERT_EQ(skel->bss->str_ret, EXP_STR_RET, "str_ret");
+
+	ASSERT_STREQ(skel->bss->over_out, EXP_OVER_OUT, "over_out");
+	ASSERT_EQ(skel->bss->over_ret, EXP_OVER_RET, "over_ret");
+
+	ASSERT_STREQ(skel->bss->pad_out, EXP_PAD_OUT, "pad_out");
+	ASSERT_EQ(skel->bss->pad_ret, EXP_PAD_RET, "pad_ret");
+
+	ASSERT_STREQ(skel->bss->noarg_out, EXP_NO_ARG_OUT, "no_arg_out");
+	ASSERT_EQ(skel->bss->noarg_ret, EXP_NO_ARG_RET, "no_arg_ret");
+
+	ASSERT_EQ(skel->bss->nobuf_ret, EXP_NO_BUF_RET, "no_buf_ret");
+
+cleanup:
+	test_snprintf__destroy(skel);
+}
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+/* Loads an eBPF object calling bpf_snprintf with up to 10 characters of fmt */
+static int load_single_snprintf(char *fmt)
+{
+	struct test_snprintf_single *skel;
+	int ret;
+
+	skel = test_snprintf_single__open();
+	if (!skel)
+		return -EINVAL;
+
+	memcpy(skel->rodata->fmt, fmt, min(strlen(fmt) + 1, 10));
+
+	ret = test_snprintf_single__load(skel);
+	test_snprintf_single__destroy(skel);
+
+	return ret;
+}
+
+void test_snprintf_negative(void)
+{
+	ASSERT_OK(load_single_snprintf("valid %d"), "valid usage");
+
+	ASSERT_ERR(load_single_snprintf("0123456789"), "no terminating zero");
+	ASSERT_ERR(load_single_snprintf("%d %d"), "too many specifiers");
+	ASSERT_ERR(load_single_snprintf("%pi5"), "invalid specifier 1");
+	ASSERT_ERR(load_single_snprintf("%a"), "invalid specifier 2");
+	ASSERT_ERR(load_single_snprintf("%"), "invalid specifier 3");
+	ASSERT_ERR(load_single_snprintf("%12345678"), "invalid specifier 4");
+	ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
+	ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
+	ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
+}
+
+void test_snprintf(void)
+{
+	if (test__start_subtest("snprintf_positive"))
+		test_snprintf_positive();
+	if (test__start_subtest("snprintf_negative"))
+		test_snprintf_negative();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c
new file mode 100644
index 000000000000..951a0301c553
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -0,0 +1,73 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char num_out[64] = {};
+long num_ret = 0;
+
+char ip_out[64] = {};
+long ip_ret = 0;
+
+char sym_out[64] = {};
+long sym_ret = 0;
+
+char addr_out[64] = {};
+long addr_ret = 0;
+
+char str_out[64] = {};
+long str_ret = 0;
+
+char over_out[6] = {};
+long over_ret = 0;
+
+char pad_out[10] = {};
+long pad_ret = 0;
+
+char noarg_out[64] = {};
+long noarg_ret = 0;
+
+long nobuf_ret = 0;
+
+extern const void schedule __ksym;
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+	/* Convenient values to pretty-print */
+	const __u8 ex_ipv4[] = {127, 0, 0, 1};
+	const __u8 ex_ipv6[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+	static const char str1[] = "str1";
+	static const char longstr[] = "longstr";
+
+	/* Integer types */
+	num_ret  = BPF_SNPRINTF(num_out, sizeof(num_out),
+				"%d %u %x %li %llu %lX",
+				-8, 9, 150, -424242, 1337, 0xDABBAD00);
+	/* IP addresses */
+	ip_ret   = BPF_SNPRINTF(ip_out, sizeof(ip_out), "%pi4 %pI6",
+				&ex_ipv4, &ex_ipv6);
+	/* Symbol lookup formatting */
+	sym_ret  = BPF_SNPRINTF(sym_out,  sizeof(sym_out), "%ps %pS %pB",
+				&schedule, &schedule, &schedule);
+	/* Kernel pointers */
+	addr_ret = BPF_SNPRINTF(addr_out, sizeof(addr_out), "%pK %px %p",
+				0, 0xFFFF00000ADD4E55, 0xFFFF00000ADD4E55);
+	/* Strings embedding */
+	str_ret  = BPF_SNPRINTF(str_out, sizeof(str_out), "%s %+05s",
+				str1, longstr);
+	/* Overflow */
+	over_ret = BPF_SNPRINTF(over_out, sizeof(over_out), "%%overflow");
+	/* Padding of fixed width numbers */
+	pad_ret = BPF_SNPRINTF(pad_out, sizeof(pad_out), "%5d %0900000X", 4, 4);
+	/* No args */
+	noarg_ret = BPF_SNPRINTF(noarg_out, sizeof(noarg_out), "simple case");
+	/* No buffer */
+	nobuf_ret = BPF_SNPRINTF(NULL, 0, "only interested in length %d", 60);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf_single.c b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
new file mode 100644
index 000000000000..402adaf344f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
@@ -0,0 +1,20 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* The format string is filled from the userspace such that loading fails */
+static const char fmt[10];
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+	unsigned long long arg = 42;
+
+	bpf_snprintf(NULL, 0, fmt, &arg, sizeof(arg));
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";