diff mbox series

[v3,bpf-next,11/19] bpf: keep track of total log content size in both fixed and rolling modes

Message ID 20230404043659.2282536-12-andrii@kernel.org (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series BPF verifier rotating log | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format fail Series longer than 15 patches (and no cover letter)
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 86 this patch: 86
netdev/cc_maintainers warning 8 maintainers not CCed: song@kernel.org sdf@google.com haoluo@google.com yhs@fb.com john.fastabend@gmail.com kpsingh@kernel.org jolsa@kernel.org martin.lau@linux.dev
netdev/build_clang success Errors and warnings before: 30 this patch: 30
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 86 this patch: 86
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain_full }}
bpf/vmtest-bpf-next-VM_Test-2 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 fail Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix

Commit Message

Andrii Nakryiko April 4, 2023, 4:36 a.m. UTC
Change how we do accounting in BPF_LOG_FIXED mode and adopt log->end_pos
as *logical* log position. This means that we can go beyond physical log
buffer size now and be able to tell what log buffer size should be to
fit entire log contents without -ENOSPC.

To do this for BPF_LOG_FIXED mode, we need to remove a short-circuiting
logic of not vsnprintf()'ing further log content once we filled up
user-provided buffer, which is done by bpf_verifier_log_needed() checks.
We modify these checks to always keep going if log->level is non-zero
(i.e., log is requested), even if log->ubuf was NULL'ed out due to
copying data to user-space, or if entire log buffer is physically full.
We adopt bpf_verifier_vlog() routine to work correctly with
log->ubuf == NULL condition, performing log formatting into temporary
kernel buffer, doing all the necessary accounting, but just avoiding
copying data out if buffer is full or NULL'ed out.

With these changes, it's now possible to do this sort of determination of
log contents size in both BPF_LOG_FIXED and default rolling log mode.
We need to keep in mind bpf_vlog_reset(), though, which shrinks log
contents after successful verification of a particular code path. This
log reset means that log->end_pos isn't always increasing, so to return
back to users what should be the log buffer size to fit all log content
without causing -ENOSPC even in the presenec of log resetting, we need
to keep maximum over "lifetime" of logging. We do this accounting in
bpf_vlog_update_len_max() helper.

A related and subtle aspect is that with this logical log->end_pos even in
BPF_LOG_FIXED mode we could temporary "overflow" buffer, but then reset
it back with bpf_vlog_reset() to a position inside user-supplied
log_buf. In such situation we still want to properly maintain
terminating zero. We will eventually return -ENOSPC even if final log
buffer is small (we detect this through log->len_max check). This
behavior is simpler to reason about and is consistent with current
behavior of verifier log. Handling of this required a small addition to
bpf_vlog_reset() logic to avoid doing put_user() beyond physical log
buffer dimensions.

Another issue to keep in mind is that we limit log buffer size to 32-bit
value and keep such log length as u32, but theoretically verifier could
produce huge log stretching beyond 4GB. Instead of keeping (and later
returning) 64-bit log length, we cap it at UINT_MAX. Current UAPI makes
it impossible to specify log buffer size bigger than 4GB anyways, so we
don't really loose anything here and keep everything consistently 32-bit
in UAPI. This property will be utilized in next patch.

Doing the same determination of maximum log buffer for rolling mode is
trivial, as log->end_pos and log->start_pos are already logical
positions, so there is nothing new there.

These changes do incidentally fix one small issue with previous logging
logic. Previously, if use provided log buffer of size N, and actual log
output was exactly N-1 bytes + terminating \0, kernel logic coun't
distinguish this condition from log truncation scenario which would end
up with truncated log contents of N-1 bytes + terminating \0 as well.

But now with log->end_pos being logical position that could go beyond
actual log buffer size, we can distinguish these two conditions, which
we do in this patch. This plays nicely with returning log_size_actual
(implemented in UAPI in the next patch), as we can now guarantee that if
user takes such log_size_actual and provides log buffer of that exact
size, they will not get -ENOSPC in return.

All in all, all these changes do conceptually unify fixed and rolling
log modes much better, and allow a nice feature requested by users:
knowing what should be the size of the buffer to avoid -ENOSPC.

We'll plumb this through the UAPI and the code in the next patch.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf_verifier.h | 12 ++-----
 kernel/bpf/log.c             | 68 +++++++++++++++++++++++++-----------
 2 files changed, 50 insertions(+), 30 deletions(-)

Comments

Lorenz Bauer April 5, 2023, 5:29 p.m. UTC | #1
On Tue, Apr 4, 2023 at 5:37 AM Andrii Nakryiko <andrii@kernel.org> wrote:
>
> With these changes, it's now possible to do this sort of determination of
> log contents size in both BPF_LOG_FIXED and default rolling log mode.
> We need to keep in mind bpf_vlog_reset(), though, which shrinks log
> contents after successful verification of a particular code path. This
> log reset means that log->end_pos isn't always increasing, so to return
> back to users what should be the log buffer size to fit all log content
> without causing -ENOSPC even in the presenec of log resetting, we need

Just for you :) Nit: presence

> to keep maximum over "lifetime" of logging. We do this accounting in
> bpf_vlog_update_len_max() helper.

Ah, this is interesting! The way I conceived of this working is that
the kernel gives me the buffer size required to avoid truncation at
the final copy out _given the same flags_. From a user space POV I
don't care about the intermediate log that was truncated away, since I
in a way asked the kernel to not give me this information. Can we drop
the len_max logic and simply use end_pos?

> Another issue to keep in mind is that we limit log buffer size to 32-bit
> value and keep such log length as u32, but theoretically verifier could
> produce huge log stretching beyond 4GB. Instead of keeping (and later
> returning) 64-bit log length, we cap it at UINT_MAX. Current UAPI makes
> it impossible to specify log buffer size bigger than 4GB anyways, so we
> don't really loose anything here and keep everything consistently 32-bit
> in UAPI. This property will be utilized in next patch.
> These changes do incidentally fix one small issue with previous logging
> logic. Previously, if use provided log buffer of size N, and actual log
> output was exactly N-1 bytes + terminating \0, kernel logic coun't
> distinguish this condition from log truncation scenario which would end
> up with truncated log contents of N-1 bytes + terminating \0 as well.
>
> But now with log->end_pos being logical position that could go beyond
> actual log buffer size, we can distinguish these two conditions, which
> we do in this patch. This plays nicely with returning log_size_actual
> (implemented in UAPI in the next patch), as we can now guarantee that if
> user takes such log_size_actual and provides log buffer of that exact
> size, they will not get -ENOSPC in return.
>
> All in all, all these changes do conceptually unify fixed and rolling
> log modes much better, and allow a nice feature requested by users:
> knowing what should be the size of the buffer to avoid -ENOSPC.
>
> We'll plumb this through the UAPI and the code in the next patch.
>
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>  include/linux/bpf_verifier.h | 12 ++-----
>  kernel/bpf/log.c             | 68 +++++++++++++++++++++++++-----------
>  2 files changed, 50 insertions(+), 30 deletions(-)
>
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index 4c926227f612..98d2eb382dbb 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -504,6 +504,7 @@ struct bpf_verifier_log {
>         char __user *ubuf;
>         u32 level;
>         u32 len_total;
> +       u32 len_max;
>         char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
>  };
>
> @@ -517,23 +518,16 @@ struct bpf_verifier_log {
>  #define BPF_LOG_MIN_ALIGNMENT 8U
>  #define BPF_LOG_ALIGNMENT 40U
>
> -static inline u32 bpf_log_used(const struct bpf_verifier_log *log)
> -{
> -       return log->end_pos - log->start_pos;
> -}
> -
>  static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
>  {
>         if (log->level & BPF_LOG_FIXED)
> -               return bpf_log_used(log) >= log->len_total - 1;
> +               return log->end_pos >= log->len_total;
>         return false;
>  }
>
>  static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
>  {
> -       return log &&
> -               ((log->level && log->ubuf && !bpf_verifier_log_full(log)) ||
> -                log->level == BPF_LOG_KERNEL);
> +       return log && log->level;
>  }
>
>  #define BPF_MAX_SUBPROGS 256
> diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
> index 14dc4d90adbe..acfe8f5d340a 100644
> --- a/kernel/bpf/log.c
> +++ b/kernel/bpf/log.c
> @@ -16,10 +16,26 @@ bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log)
>                log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK);
>  }
>
> +static void bpf_vlog_update_len_max(struct bpf_verifier_log *log, u32 add_len)
> +{
> +       /* add_len includes terminal \0, so no need for +1. */
> +       u64 len = log->end_pos + add_len;
> +
> +       /* log->len_max could be larger than our current len due to
> +        * bpf_vlog_reset() calls, so we maintain the max of any length at any
> +        * previous point
> +        */
> +       if (len > UINT_MAX)
> +               log->len_max = UINT_MAX;
> +       else if (len > log->len_max)
> +               log->len_max = len;
> +}
> +
>  void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
>                        va_list args)
>  {
> -       unsigned int n;
> +       u64 cur_pos;
> +       u32 new_n, n;
>
>         n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
>
> @@ -33,21 +49,28 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
>                 return;
>         }
>
> +       n += 1; /* include terminating zero */

So above we WARN_ONCE if n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, but here
we add 1 anyways. Doesn't that mean we may read 1 byte past the end of
kbuf?


>         if (log->level & BPF_LOG_FIXED) {
> -               n = min(log->len_total - bpf_log_used(log) - 1, n);
> -               log->kbuf[n] = '\0';
> -               n += 1;
> -
> -               if (copy_to_user(log->ubuf + log->end_pos, log->kbuf, n))
> -                       goto fail;
> +               /* check if we have at least something to put into user buf */
> +               new_n = 0;
> +               if (log->end_pos < log->len_total - 1) {
> +                       new_n = min_t(u32, log->len_total - log->end_pos, n);
> +                       log->kbuf[new_n - 1] = '\0';
> +               }
>
> +               bpf_vlog_update_len_max(log, n);
> +               cur_pos = log->end_pos;
>                 log->end_pos += n - 1; /* don't count terminating '\0' */
> +
> +               if (log->ubuf && new_n &&
> +                   copy_to_user(log->ubuf + cur_pos, log->kbuf, new_n))
> +                       goto fail;
>         } else {
> -               u64 new_end, new_start, cur_pos;
> +               u64 new_end, new_start;
>                 u32 buf_start, buf_end, new_n;
>
> -               log->kbuf[n] = '\0';
> -               n += 1;
> +               log->kbuf[n - 1] = '\0';
> +               bpf_vlog_update_len_max(log, n);
>
>                 new_end = log->end_pos + n;
>                 if (new_end - log->start_pos >= log->len_total)
> @@ -66,6 +89,12 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
>                 if (buf_end == 0)
>                         buf_end = log->len_total;
>
> +               log->start_pos = new_start;
> +               log->end_pos = new_end - 1; /* don't count terminating '\0' */
> +
> +               if (!log->ubuf)
> +                       return;
> +
>                 /* if buf_start > buf_end, we wrapped around;
>                  * if buf_start == buf_end, then we fill ubuf completely; we
>                  * can't have buf_start == buf_end to mean that there is
> @@ -89,9 +118,6 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
>                                          buf_end))
>                                 goto fail;
>                 }
> -
> -               log->start_pos = new_start;
> -               log->end_pos = new_end - 1; /* don't count terminating '\0' */
>         }
>
>         return;
> @@ -114,8 +140,13 @@ void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos)
>         log->end_pos = new_pos;
>         if (log->end_pos < log->start_pos)
>                 log->start_pos = log->end_pos;
> -       div_u64_rem(new_pos, log->len_total, &pos);
> -       if (put_user(zero, log->ubuf + pos))
> +
> +       if (log->level & BPF_LOG_FIXED)
> +               pos = log->end_pos + 1;
> +       else
> +               div_u64_rem(new_pos, log->len_total, &pos);
> +
> +       if (log->ubuf && pos < log->len_total && put_user(zero, log->ubuf + pos))
>                 log->ubuf = NULL;
>  }
>
> @@ -167,12 +198,7 @@ static int bpf_vlog_reverse_ubuf(struct bpf_verifier_log *log, int start, int en
>
>  bool bpf_vlog_truncated(const struct bpf_verifier_log *log)
>  {
> -       if (!log->level)
> -               return false;
> -       else if (log->level & BPF_LOG_FIXED)
> -               return bpf_log_used(log) >= log->len_total - 1;
> -       else
> -               return log->start_pos > 0;
> +       return log->len_max > log->len_total;
>  }
>
>  void bpf_vlog_finalize(struct bpf_verifier_log *log)
> --
> 2.34.1
>
Andrii Nakryiko April 5, 2023, 5:50 p.m. UTC | #2
On Wed, Apr 5, 2023 at 10:29 AM Lorenz Bauer <lmb@isovalent.com> wrote:
>
> On Tue, Apr 4, 2023 at 5:37 AM Andrii Nakryiko <andrii@kernel.org> wrote:
> >
> > With these changes, it's now possible to do this sort of determination of
> > log contents size in both BPF_LOG_FIXED and default rolling log mode.
> > We need to keep in mind bpf_vlog_reset(), though, which shrinks log
> > contents after successful verification of a particular code path. This
> > log reset means that log->end_pos isn't always increasing, so to return
> > back to users what should be the log buffer size to fit all log content
> > without causing -ENOSPC even in the presenec of log resetting, we need
>
> Just for you :) Nit: presence
>
> > to keep maximum over "lifetime" of logging. We do this accounting in
> > bpf_vlog_update_len_max() helper.
>
> Ah, this is interesting! The way I conceived of this working is that
> the kernel gives me the buffer size required to avoid truncation at
> the final copy out _given the same flags_. From a user space POV I
> don't care about the intermediate log that was truncated away, since I
> in a way asked the kernel to not give me this information. Can we drop
> the len_max logic and simply use end_pos?

No, we can't, because if we don't take into account this max, then
you'll get -ENOSPC. This has all to do with verifier position
resetting. In log_level=1 log can grow while we process some code path
branch, and as soon as we get to EXIT successfully, verifier will
"forget" relevant parts of the log to go back to parent state. This
was done a while ago to only leave verifier history from beginning to
failure, without all the successfully verified (and thus irrelevant
for debugging) information. This was a huge usability boost and I
don't think we want to go back.

But it does mean that even if final log content is small, required log
buffer size at some point might have been much bigger. And it's pretty
much always the case for successfully verified programs. They end up
having a small verifier stats summary and no log of what verifier did
for state verification.

So, no, we can't just use final end_pos, otherwise we get a useless feature.

>
> > Another issue to keep in mind is that we limit log buffer size to 32-bit
> > value and keep such log length as u32, but theoretically verifier could
> > produce huge log stretching beyond 4GB. Instead of keeping (and later
> > returning) 64-bit log length, we cap it at UINT_MAX. Current UAPI makes
> > it impossible to specify log buffer size bigger than 4GB anyways, so we
> > don't really loose anything here and keep everything consistently 32-bit
> > in UAPI. This property will be utilized in next patch.
> > These changes do incidentally fix one small issue with previous logging
> > logic. Previously, if use provided log buffer of size N, and actual log
> > output was exactly N-1 bytes + terminating \0, kernel logic coun't
> > distinguish this condition from log truncation scenario which would end
> > up with truncated log contents of N-1 bytes + terminating \0 as well.
> >
> > But now with log->end_pos being logical position that could go beyond
> > actual log buffer size, we can distinguish these two conditions, which
> > we do in this patch. This plays nicely with returning log_size_actual
> > (implemented in UAPI in the next patch), as we can now guarantee that if
> > user takes such log_size_actual and provides log buffer of that exact
> > size, they will not get -ENOSPC in return.
> >
> > All in all, all these changes do conceptually unify fixed and rolling
> > log modes much better, and allow a nice feature requested by users:
> > knowing what should be the size of the buffer to avoid -ENOSPC.
> >
> > We'll plumb this through the UAPI and the code in the next patch.
> >
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
> >  include/linux/bpf_verifier.h | 12 ++-----
> >  kernel/bpf/log.c             | 68 +++++++++++++++++++++++++-----------
> >  2 files changed, 50 insertions(+), 30 deletions(-)
> >
> > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> > index 4c926227f612..98d2eb382dbb 100644
> > --- a/include/linux/bpf_verifier.h
> > +++ b/include/linux/bpf_verifier.h
> > @@ -504,6 +504,7 @@ struct bpf_verifier_log {
> >         char __user *ubuf;
> >         u32 level;
> >         u32 len_total;
> > +       u32 len_max;
> >         char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
> >  };
> >
> > @@ -517,23 +518,16 @@ struct bpf_verifier_log {
> >  #define BPF_LOG_MIN_ALIGNMENT 8U
> >  #define BPF_LOG_ALIGNMENT 40U
> >
> > -static inline u32 bpf_log_used(const struct bpf_verifier_log *log)
> > -{
> > -       return log->end_pos - log->start_pos;
> > -}
> > -
> >  static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
> >  {
> >         if (log->level & BPF_LOG_FIXED)
> > -               return bpf_log_used(log) >= log->len_total - 1;
> > +               return log->end_pos >= log->len_total;
> >         return false;
> >  }
> >
> >  static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
> >  {
> > -       return log &&
> > -               ((log->level && log->ubuf && !bpf_verifier_log_full(log)) ||
> > -                log->level == BPF_LOG_KERNEL);
> > +       return log && log->level;
> >  }
> >
> >  #define BPF_MAX_SUBPROGS 256
> > diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
> > index 14dc4d90adbe..acfe8f5d340a 100644
> > --- a/kernel/bpf/log.c
> > +++ b/kernel/bpf/log.c
> > @@ -16,10 +16,26 @@ bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log)
> >                log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK);
> >  }
> >
> > +static void bpf_vlog_update_len_max(struct bpf_verifier_log *log, u32 add_len)
> > +{
> > +       /* add_len includes terminal \0, so no need for +1. */
> > +       u64 len = log->end_pos + add_len;
> > +
> > +       /* log->len_max could be larger than our current len due to
> > +        * bpf_vlog_reset() calls, so we maintain the max of any length at any
> > +        * previous point
> > +        */
> > +       if (len > UINT_MAX)
> > +               log->len_max = UINT_MAX;
> > +       else if (len > log->len_max)
> > +               log->len_max = len;
> > +}
> > +
> >  void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
> >                        va_list args)
> >  {
> > -       unsigned int n;
> > +       u64 cur_pos;
> > +       u32 new_n, n;
> >
> >         n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
> >
> > @@ -33,21 +49,28 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
> >                 return;
> >         }
> >
> > +       n += 1; /* include terminating zero */
>
> So above we WARN_ONCE if n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, but here
> we add 1 anyways. Doesn't that mean we may read 1 byte past the end of
> kbuf?

Nice catch! I guess I have to do `n = min(n, BPF_VERIFIER_TMP_LOG_SIZE
- 1)` before that, will fix it, thanks!

>
>
> >         if (log->level & BPF_LOG_FIXED) {
> > -               n = min(log->len_total - bpf_log_used(log) - 1, n);
> > -               log->kbuf[n] = '\0';
> > -               n += 1;
> > -
> > -               if (copy_to_user(log->ubuf + log->end_pos, log->kbuf, n))
> > -                       goto fail;
> > +               /* check if we have at least something to put into user buf */
> > +               new_n = 0;
> > +               if (log->end_pos < log->len_total - 1) {
> > +                       new_n = min_t(u32, log->len_total - log->end_pos, n);
> > +                       log->kbuf[new_n - 1] = '\0';
> > +               }
> >
> > +               bpf_vlog_update_len_max(log, n);
> > +               cur_pos = log->end_pos;
> >                 log->end_pos += n - 1; /* don't count terminating '\0' */
> > +
> > +               if (log->ubuf && new_n &&
> > +                   copy_to_user(log->ubuf + cur_pos, log->kbuf, new_n))
> > +                       goto fail;

[...]
Andrii Nakryiko April 5, 2023, 6:17 p.m. UTC | #3
On Wed, Apr 5, 2023 at 10:50 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Wed, Apr 5, 2023 at 10:29 AM Lorenz Bauer <lmb@isovalent.com> wrote:
> >
> > On Tue, Apr 4, 2023 at 5:37 AM Andrii Nakryiko <andrii@kernel.org> wrote:
> > >
> > > With these changes, it's now possible to do this sort of determination of
> > > log contents size in both BPF_LOG_FIXED and default rolling log mode.
> > > We need to keep in mind bpf_vlog_reset(), though, which shrinks log
> > > contents after successful verification of a particular code path. This
> > > log reset means that log->end_pos isn't always increasing, so to return
> > > back to users what should be the log buffer size to fit all log content
> > > without causing -ENOSPC even in the presenec of log resetting, we need
> >
> > Just for you :) Nit: presence
> >
> > > to keep maximum over "lifetime" of logging. We do this accounting in
> > > bpf_vlog_update_len_max() helper.
> >
> > Ah, this is interesting! The way I conceived of this working is that
> > the kernel gives me the buffer size required to avoid truncation at
> > the final copy out _given the same flags_. From a user space POV I
> > don't care about the intermediate log that was truncated away, since I
> > in a way asked the kernel to not give me this information. Can we drop
> > the len_max logic and simply use end_pos?
>
> No, we can't, because if we don't take into account this max, then
> you'll get -ENOSPC. This has all to do with verifier position
> resetting. In log_level=1 log can grow while we process some code path
> branch, and as soon as we get to EXIT successfully, verifier will
> "forget" relevant parts of the log to go back to parent state. This
> was done a while ago to only leave verifier history from beginning to
> failure, without all the successfully verified (and thus irrelevant
> for debugging) information. This was a huge usability boost and I
> don't think we want to go back.
>
> But it does mean that even if final log content is small, required log
> buffer size at some point might have been much bigger. And it's pretty
> much always the case for successfully verified programs. They end up
> having a small verifier stats summary and no log of what verifier did
> for state verification.
>
> So, no, we can't just use final end_pos, otherwise we get a useless feature.
>
> >
> > > Another issue to keep in mind is that we limit log buffer size to 32-bit
> > > value and keep such log length as u32, but theoretically verifier could
> > > produce huge log stretching beyond 4GB. Instead of keeping (and later
> > > returning) 64-bit log length, we cap it at UINT_MAX. Current UAPI makes
> > > it impossible to specify log buffer size bigger than 4GB anyways, so we
> > > don't really loose anything here and keep everything consistently 32-bit
> > > in UAPI. This property will be utilized in next patch.
> > > These changes do incidentally fix one small issue with previous logging
> > > logic. Previously, if use provided log buffer of size N, and actual log
> > > output was exactly N-1 bytes + terminating \0, kernel logic coun't
> > > distinguish this condition from log truncation scenario which would end
> > > up with truncated log contents of N-1 bytes + terminating \0 as well.
> > >
> > > But now with log->end_pos being logical position that could go beyond
> > > actual log buffer size, we can distinguish these two conditions, which
> > > we do in this patch. This plays nicely with returning log_size_actual
> > > (implemented in UAPI in the next patch), as we can now guarantee that if
> > > user takes such log_size_actual and provides log buffer of that exact
> > > size, they will not get -ENOSPC in return.
> > >
> > > All in all, all these changes do conceptually unify fixed and rolling
> > > log modes much better, and allow a nice feature requested by users:
> > > knowing what should be the size of the buffer to avoid -ENOSPC.
> > >
> > > We'll plumb this through the UAPI and the code in the next patch.
> > >
> > > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > > ---
> > >  include/linux/bpf_verifier.h | 12 ++-----
> > >  kernel/bpf/log.c             | 68 +++++++++++++++++++++++++-----------
> > >  2 files changed, 50 insertions(+), 30 deletions(-)
> > >
> > > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> > > index 4c926227f612..98d2eb382dbb 100644
> > > --- a/include/linux/bpf_verifier.h
> > > +++ b/include/linux/bpf_verifier.h
> > > @@ -504,6 +504,7 @@ struct bpf_verifier_log {
> > >         char __user *ubuf;
> > >         u32 level;
> > >         u32 len_total;
> > > +       u32 len_max;
> > >         char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
> > >  };
> > >
> > > @@ -517,23 +518,16 @@ struct bpf_verifier_log {
> > >  #define BPF_LOG_MIN_ALIGNMENT 8U
> > >  #define BPF_LOG_ALIGNMENT 40U
> > >
> > > -static inline u32 bpf_log_used(const struct bpf_verifier_log *log)
> > > -{
> > > -       return log->end_pos - log->start_pos;
> > > -}
> > > -
> > >  static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
> > >  {
> > >         if (log->level & BPF_LOG_FIXED)
> > > -               return bpf_log_used(log) >= log->len_total - 1;
> > > +               return log->end_pos >= log->len_total;
> > >         return false;
> > >  }
> > >
> > >  static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
> > >  {
> > > -       return log &&
> > > -               ((log->level && log->ubuf && !bpf_verifier_log_full(log)) ||
> > > -                log->level == BPF_LOG_KERNEL);
> > > +       return log && log->level;
> > >  }
> > >
> > >  #define BPF_MAX_SUBPROGS 256
> > > diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
> > > index 14dc4d90adbe..acfe8f5d340a 100644
> > > --- a/kernel/bpf/log.c
> > > +++ b/kernel/bpf/log.c
> > > @@ -16,10 +16,26 @@ bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log)
> > >                log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK);
> > >  }
> > >
> > > +static void bpf_vlog_update_len_max(struct bpf_verifier_log *log, u32 add_len)
> > > +{
> > > +       /* add_len includes terminal \0, so no need for +1. */
> > > +       u64 len = log->end_pos + add_len;
> > > +
> > > +       /* log->len_max could be larger than our current len due to
> > > +        * bpf_vlog_reset() calls, so we maintain the max of any length at any
> > > +        * previous point
> > > +        */
> > > +       if (len > UINT_MAX)
> > > +               log->len_max = UINT_MAX;
> > > +       else if (len > log->len_max)
> > > +               log->len_max = len;
> > > +}
> > > +
> > >  void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
> > >                        va_list args)
> > >  {
> > > -       unsigned int n;
> > > +       u64 cur_pos;
> > > +       u32 new_n, n;
> > >
> > >         n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
> > >
> > > @@ -33,21 +49,28 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
> > >                 return;
> > >         }
> > >
> > > +       n += 1; /* include terminating zero */
> >
> > So above we WARN_ONCE if n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, but here
> > we add 1 anyways. Doesn't that mean we may read 1 byte past the end of
> > kbuf?
>
> Nice catch! I guess I have to do `n = min(n, BPF_VERIFIER_TMP_LOG_SIZE
> - 1)` before that, will fix it, thanks!
>

Ok, so looking at your comment about vscnprintf and checking the
source code, no, we don't have a problem here. vscnprintf() will at
most return buf size - 1, and will still put zero into the last byte.
So we can never read past buffer end.

> >
> >
> > >         if (log->level & BPF_LOG_FIXED) {
> > > -               n = min(log->len_total - bpf_log_used(log) - 1, n);
> > > -               log->kbuf[n] = '\0';
> > > -               n += 1;
> > > -
> > > -               if (copy_to_user(log->ubuf + log->end_pos, log->kbuf, n))
> > > -                       goto fail;
> > > +               /* check if we have at least something to put into user buf */
> > > +               new_n = 0;
> > > +               if (log->end_pos < log->len_total - 1) {
> > > +                       new_n = min_t(u32, log->len_total - log->end_pos, n);
> > > +                       log->kbuf[new_n - 1] = '\0';
> > > +               }
> > >
> > > +               bpf_vlog_update_len_max(log, n);
> > > +               cur_pos = log->end_pos;
> > >                 log->end_pos += n - 1; /* don't count terminating '\0' */
> > > +
> > > +               if (log->ubuf && new_n &&
> > > +                   copy_to_user(log->ubuf + cur_pos, log->kbuf, new_n))
> > > +                       goto fail;
>
> [...]
diff mbox series

Patch

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 4c926227f612..98d2eb382dbb 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -504,6 +504,7 @@  struct bpf_verifier_log {
 	char __user *ubuf;
 	u32 level;
 	u32 len_total;
+	u32 len_max;
 	char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
 };
 
@@ -517,23 +518,16 @@  struct bpf_verifier_log {
 #define BPF_LOG_MIN_ALIGNMENT 8U
 #define BPF_LOG_ALIGNMENT 40U
 
-static inline u32 bpf_log_used(const struct bpf_verifier_log *log)
-{
-	return log->end_pos - log->start_pos;
-}
-
 static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
 {
 	if (log->level & BPF_LOG_FIXED)
-		return bpf_log_used(log) >= log->len_total - 1;
+		return log->end_pos >= log->len_total;
 	return false;
 }
 
 static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
 {
-	return log &&
-		((log->level && log->ubuf && !bpf_verifier_log_full(log)) ||
-		 log->level == BPF_LOG_KERNEL);
+	return log && log->level;
 }
 
 #define BPF_MAX_SUBPROGS 256
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 14dc4d90adbe..acfe8f5d340a 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -16,10 +16,26 @@  bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log)
 	       log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK);
 }
 
+static void bpf_vlog_update_len_max(struct bpf_verifier_log *log, u32 add_len)
+{
+	/* add_len includes terminal \0, so no need for +1. */
+	u64 len = log->end_pos + add_len;
+
+	/* log->len_max could be larger than our current len due to
+	 * bpf_vlog_reset() calls, so we maintain the max of any length at any
+	 * previous point
+	 */
+	if (len > UINT_MAX)
+		log->len_max = UINT_MAX;
+	else if (len > log->len_max)
+		log->len_max = len;
+}
+
 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
 		       va_list args)
 {
-	unsigned int n;
+	u64 cur_pos;
+	u32 new_n, n;
 
 	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
 
@@ -33,21 +49,28 @@  void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
 		return;
 	}
 
+	n += 1; /* include terminating zero */
 	if (log->level & BPF_LOG_FIXED) {
-		n = min(log->len_total - bpf_log_used(log) - 1, n);
-		log->kbuf[n] = '\0';
-		n += 1;
-
-		if (copy_to_user(log->ubuf + log->end_pos, log->kbuf, n))
-			goto fail;
+		/* check if we have at least something to put into user buf */
+		new_n = 0;
+		if (log->end_pos < log->len_total - 1) {
+			new_n = min_t(u32, log->len_total - log->end_pos, n);
+			log->kbuf[new_n - 1] = '\0';
+		}
 
+		bpf_vlog_update_len_max(log, n);
+		cur_pos = log->end_pos;
 		log->end_pos += n - 1; /* don't count terminating '\0' */
+
+		if (log->ubuf && new_n &&
+		    copy_to_user(log->ubuf + cur_pos, log->kbuf, new_n))
+			goto fail;
 	} else {
-		u64 new_end, new_start, cur_pos;
+		u64 new_end, new_start;
 		u32 buf_start, buf_end, new_n;
 
-		log->kbuf[n] = '\0';
-		n += 1;
+		log->kbuf[n - 1] = '\0';
+		bpf_vlog_update_len_max(log, n);
 
 		new_end = log->end_pos + n;
 		if (new_end - log->start_pos >= log->len_total)
@@ -66,6 +89,12 @@  void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
 		if (buf_end == 0)
 			buf_end = log->len_total;
 
+		log->start_pos = new_start;
+		log->end_pos = new_end - 1; /* don't count terminating '\0' */
+
+		if (!log->ubuf)
+			return;
+
 		/* if buf_start > buf_end, we wrapped around;
 		 * if buf_start == buf_end, then we fill ubuf completely; we
 		 * can't have buf_start == buf_end to mean that there is
@@ -89,9 +118,6 @@  void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
 					 buf_end))
 				goto fail;
 		}
-
-		log->start_pos = new_start;
-		log->end_pos = new_end - 1; /* don't count terminating '\0' */
 	}
 
 	return;
@@ -114,8 +140,13 @@  void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos)
 	log->end_pos = new_pos;
 	if (log->end_pos < log->start_pos)
 		log->start_pos = log->end_pos;
-	div_u64_rem(new_pos, log->len_total, &pos);
-	if (put_user(zero, log->ubuf + pos))
+
+	if (log->level & BPF_LOG_FIXED)
+		pos = log->end_pos + 1;
+	else
+		div_u64_rem(new_pos, log->len_total, &pos);
+
+	if (log->ubuf && pos < log->len_total && put_user(zero, log->ubuf + pos))
 		log->ubuf = NULL;
 }
 
@@ -167,12 +198,7 @@  static int bpf_vlog_reverse_ubuf(struct bpf_verifier_log *log, int start, int en
 
 bool bpf_vlog_truncated(const struct bpf_verifier_log *log)
 {
-	if (!log->level)
-		return false;
-	else if (log->level & BPF_LOG_FIXED)
-		return bpf_log_used(log) >= log->len_total - 1;
-	else
-		return log->start_pos > 0;
+	return log->len_max > log->len_total;
 }
 
 void bpf_vlog_finalize(struct bpf_verifier_log *log)