diff mbox series

[2/3] bpf: Allow NULL buffers in bpf_dynptr_slice(_rw)

Message ID 20230406004018.1439952-3-drosen@google.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Dynptr Verifier Adjustments | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-13 fail Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-16 fail Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 fail Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 fail Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 fail Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 fail Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-20 fail Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on s390x with gcc

Commit Message

Daniel Rosenberg April 6, 2023, 12:40 a.m. UTC
bpf_dynptr_slice(_rw) uses a user provided buffer if it can not provide
a pointer to a block of contiguous memory. This buffer is unused in the
case of local dynptrs, and may be unused in other cases as well. There
is no need to require the buffer, as the kfunc can just return NULL if
it was needed and not provided.

This adds another kfunc annotation, __opt, which combines with __sz and
__szk to allow the buffer associated with the size to be NULL. If the
buffer is NULL, the verifier does not check that the buffer is of
sufficient size.

Signed-off-by: Daniel Rosenberg <drosen@google.com>
---
 Documentation/bpf/kfuncs.rst | 23 ++++++++++++++++++++++-
 kernel/bpf/helpers.c         | 32 ++++++++++++++++++++------------
 kernel/bpf/verifier.c        | 17 +++++++++++++++++
 3 files changed, 59 insertions(+), 13 deletions(-)

Comments

Andrii Nakryiko April 6, 2023, 9:09 p.m. UTC | #1
On Wed, Apr 5, 2023 at 5:40 PM Daniel Rosenberg <drosen@google.com> wrote:
>
> bpf_dynptr_slice(_rw) uses a user provided buffer if it can not provide
> a pointer to a block of contiguous memory. This buffer is unused in the
> case of local dynptrs, and may be unused in other cases as well. There
> is no need to require the buffer, as the kfunc can just return NULL if
> it was needed and not provided.
>
> This adds another kfunc annotation, __opt, which combines with __sz and
> __szk to allow the buffer associated with the size to be NULL. If the
> buffer is NULL, the verifier does not check that the buffer is of
> sufficient size.
>
> Signed-off-by: Daniel Rosenberg <drosen@google.com>
> ---
>  Documentation/bpf/kfuncs.rst | 23 ++++++++++++++++++++++-
>  kernel/bpf/helpers.c         | 32 ++++++++++++++++++++------------
>  kernel/bpf/verifier.c        | 17 +++++++++++++++++
>  3 files changed, 59 insertions(+), 13 deletions(-)
>
> diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
> index d8a16c4bef7f..69573b511233 100644
> --- a/Documentation/bpf/kfuncs.rst
> +++ b/Documentation/bpf/kfuncs.rst
> @@ -100,7 +100,7 @@ Hence, whenever a constant scalar argument is accepted by a kfunc which is not a
>  size parameter, and the value of the constant matters for program safety, __k
>  suffix should be used.
>
> -2.2.2 __uninit Annotation
> +2.2.3 __uninit Annotation
>  -------------------------
>
>  This annotation is used to indicate that the argument will be treated as
> @@ -117,6 +117,27 @@ Here, the dynptr will be treated as an uninitialized dynptr. Without this
>  annotation, the verifier will reject the program if the dynptr passed in is
>  not initialized.
>
> +2.2.4 __opt Annotation
> +-------------------------
> +
> +This annotation is used to indicate that the buffer associated with an __sz or __szk
> +argument may be null. If the function is passed a nullptr in place of the buffer,
> +the verifier will not check that length is appropriate for the buffer. The kfunc is
> +responsible for checking if this buffer is null before using it.
> +
> +An example is given below::
> +
> +        __bpf_kfunc void *bpf_dynptr_slice(..., void *buffer__opt, u32 buffer__szk)
> +        {
> +        ...
> +        }
> +
> +Here, the buffer may be null. If buffer is not null, it at least of size buffer_szk.
> +Either way, the returned buffer is either NULL, or of size buffer_szk. Without this
> +annotation, the verifier will reject the program if a null pointer is passed in with
> +a nonzero size.
> +
> +
>  .. _BPF_kfunc_nodef:
>
>  2.3 Using an existing kernel function
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 6be16db9f188..f08556fd8b96 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -2145,13 +2145,15 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
>   * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data.
>   * @ptr: The dynptr whose data slice to retrieve
>   * @offset: Offset into the dynptr
> - * @buffer: User-provided buffer to copy contents into
> - * @buffer__szk: Size (in bytes) of the buffer. This is the length of the
> - *              requested slice. This must be a constant.
> + * @buffer__opt: User-provided buffer to copy contents into.  May be NULL
> + * @buffer__szk: Size (in bytes) of the buffer if present. This is the
> + *               length of the requested slice. This must be a constant.
>   *
>   * For non-skb and non-xdp type dynptrs, there is no difference between
>   * bpf_dynptr_slice and bpf_dynptr_data.
>   *
> + *  If buffer__opt is NULL, the call will fail if buffer_opt was needed.
> + *
>   * If the intention is to write to the data slice, please use
>   * bpf_dynptr_slice_rdwr.
>   *
> @@ -2168,7 +2170,7 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
>   * direct pointer)
>   */
>  __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset,
> -                                  void *buffer, u32 buffer__szk)
> +                                  void *buffer__opt, u32 buffer__szk)
>  {
>         enum bpf_dynptr_type type;
>         u32 len = buffer__szk;
> @@ -2188,15 +2190,19 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
>         case BPF_DYNPTR_TYPE_RINGBUF:
>                 return ptr->data + ptr->offset + offset;
>         case BPF_DYNPTR_TYPE_SKB:
> -               return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer);
> +               if (!buffer__opt)
> +                       return NULL;

should we always reject NULL even for SKB/XDP or only when the buffer
*would be* required? If the latter, we could use bpf_dynptr_slice()
with NULL buf to say "only return pointer if no byte copying is
required". As opposed to bpf_dynptr_data(), where I think we always
fail for SKB/XDP, because we are not sure whether users are aware of
this need to copy bytes. Here, users are aware, but chose to prevent
copying.

WDYT?

> +               return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__opt);
>         case BPF_DYNPTR_TYPE_XDP:
>         {
>                 void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
>                 if (xdp_ptr)
>                         return xdp_ptr;
>
> -               bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer, len, false);
> -               return buffer;
> +               if (!buffer__opt)
> +                       return NULL;
> +               bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false);
> +               return buffer__opt;
>         }
>         default:
>                 WARN_ONCE(true, "unknown dynptr type %d\n", type);
> @@ -2208,13 +2214,15 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
>   * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data.
>   * @ptr: The dynptr whose data slice to retrieve
>   * @offset: Offset into the dynptr
> - * @buffer: User-provided buffer to copy contents into
> - * @buffer__szk: Size (in bytes) of the buffer. This is the length of the
> - *              requested slice. This must be a constant.
> + * @buffer__opt: User-provided buffer to copy contents into. May be NULL
> + * @buffer__szk: Size (in bytes) of the buffer if present. This is the
> + *               length of the requested slice. This must be a constant.
>   *
>   * For non-skb and non-xdp type dynptrs, there is no difference between
>   * bpf_dynptr_slice and bpf_dynptr_data.
>   *
> + * If buffer__opt is NULL, the call will fail if buffer_opt was needed.
> + *
>   * The returned pointer is writable and may point to either directly the dynptr
>   * data at the requested offset or to the buffer if unable to obtain a direct
>   * data pointer to (example: the requested slice is to the paged area of an skb
> @@ -2245,7 +2253,7 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
>   * direct pointer)
>   */
>  __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset,
> -                                       void *buffer, u32 buffer__szk)
> +                                       void *buffer__opt, u32 buffer__szk)
>  {
>         if (!ptr->data || bpf_dynptr_is_rdonly(ptr))
>                 return NULL;
> @@ -2272,7 +2280,7 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o
>          * will be copied out into the buffer and the user will need to call
>          * bpf_dynptr_write() to commit changes.
>          */
> -       return bpf_dynptr_slice(ptr, offset, buffer, buffer__szk);
> +       return bpf_dynptr_slice(ptr, offset, buffer__opt, buffer__szk);
>  }
>
>  __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 20beab52812a..b82faef389b1 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -9428,6 +9428,19 @@ static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
>         return __kfunc_param_match_suffix(btf, arg, "__szk");
>  }
>
> +static bool is_kfunc_arg_optional(const struct btf *btf,
> +                 const struct btf_param *arg,
> +                 const struct bpf_reg_state *reg)
> +{
> +       const struct btf_type *t;
> +
> +       t = btf_type_skip_modifiers(btf, arg->type, NULL);
> +       if (!btf_type_is_ptr(t) || reg->type != SCALAR_VALUE || reg->umax_value > 0)
> +               return false;
> +
> +       return __kfunc_param_match_suffix(btf, arg, "__opt");
> +}
> +
>  static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
>  {
>         return __kfunc_param_match_suffix(btf, arg, "__k");
> @@ -10539,10 +10552,14 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
>                         break;
>                 case KF_ARG_PTR_TO_MEM_SIZE:
>                 {
> +                       struct bpf_reg_state *buff_reg = &regs[regno];
> +                       const struct btf_param *buff_arg = &args[i];
>                         struct bpf_reg_state *size_reg = &regs[regno + 1];
>                         const struct btf_param *size_arg = &args[i + 1];
>
>                         ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
> +                       if (ret < 0 && is_kfunc_arg_optional(meta->btf, buff_arg, buff_reg))
> +                               ret = 0;

would this work correctly if someone passes a non-null buffer with too
small size? Can you please add a test for this use case.

Also, I feel like for cases where we allow a NULL buffer, we need to
explicitly check that the register is a *known* NULL (SCALAR=0
basically). And also in that case the size of the buffer probably
should be enforced to zero, not just be allowed to be any value.

it's scary to just ignore some error, tbh, the number of error
conditions can grow overtime and we'll be masking them with this
is_kfunc_arg_optional() override. Let's be strict and explicit here.


>                         if (ret < 0) {
>                                 verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
>                                 return ret;
> --
> 2.40.0.577.gac1e443424-goog
>
Daniel Rosenberg April 6, 2023, 10:25 p.m. UTC | #2
On Thu, Apr 6, 2023 at 2:09 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> should we always reject NULL even for SKB/XDP or only when the buffer
> *would be* required? If the latter, we could use bpf_dynptr_slice()
> with NULL buf to say "only return pointer if no byte copying is
> required". As opposed to bpf_dynptr_data(), where I think we always
> fail for SKB/XDP, because we are not sure whether users are aware of
> this need to copy bytes. Here, users are aware, but chose to prevent
> copying.
>
> WDYT?
>

I think Passing NULL here should signal that you're quite okay with it
failing instead of copying. We could limit this to just local/ringbuf
types, but that seems like an unneeded restriction, particularly if
you're operating on some section of an skb/xdp buffer that you know
will always be contiguous.
I adjusted xdp for that. The adjustment would be similar for skb, I
just didn't do that since it was another layer of indirection deep and
hadn't looked through all of those use cases. Though it should be fine
to just reject when the buffer would have been needed, since all users
currently provide one.
I agree that allowing that same behavior for dnyptr_data would be more
likely to cause confusion. Blocking the copy on dynprt_slice is much
more explicit.

>
> would this work correctly if someone passes a non-null buffer with too
> small size? Can you please add a test for this use case.
>

Yes, that's one of the tests that's missing there. Once I get my build
situation sorted I'll add more tests. The behavior for a non-null
buffer should be unchanged by this patch.

> Also, I feel like for cases where we allow a NULL buffer, we need to
> explicitly check that the register is a *known* NULL (SCALAR=0
> basically). And also in that case the size of the buffer probably
> should be enforced to zero, not just be allowed to be any value.
>

We absolutely should check that the pointer in question is NULL before
ignoring the size check. I think I'm accomplishing that by ignoring
__opt when reg->umax_value > 0 in is_kfunc_arg_optional. Is that the
wrong check? Perhaps I should check var_off == tnum_const(0) instead.

We can't enforce the size being zero in this case because the size is
doing double duty. It's both the length of the requested area of
access into the dnyptr, and the size of the buffer that it might copy
that data into. If we don't provide a buffer, then it doesn't make
sense to check that buffer's size. The size does still apply to the
returned pointer though. Within the kfunc, it just needs to check for
null before copying dynptr data, as well as the regular enforcement of
length against the dynprt/offset.

> it's scary to just ignore some error, tbh, the number of error
> conditions can grow overtime and we'll be masking them with this
> is_kfunc_arg_optional() override. Let's be strict and explicit here.
>
It would probably make more sense to check is_kfunc_arg_optional and
skip the size check altogether. Either way we're just relying on
runtime checks against the dynptr at that point. If the buffer is
known null and optional, we don't care what the relationship between
the buffer and the size is, just that size and the dynptr. __szk
already takes care of it being a constant. This doesn't affect the
return buffer size.
Andrii Nakryiko April 6, 2023, 11:54 p.m. UTC | #3
On Thu, Apr 6, 2023 at 3:25 PM Daniel Rosenberg <drosen@google.com> wrote:
>
> On Thu, Apr 6, 2023 at 2:09 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > should we always reject NULL even for SKB/XDP or only when the buffer
> > *would be* required? If the latter, we could use bpf_dynptr_slice()
> > with NULL buf to say "only return pointer if no byte copying is
> > required". As opposed to bpf_dynptr_data(), where I think we always
> > fail for SKB/XDP, because we are not sure whether users are aware of
> > this need to copy bytes. Here, users are aware, but chose to prevent
> > copying.
> >
> > WDYT?
> >
>
> I think Passing NULL here should signal that you're quite okay with it
> failing instead of copying. We could limit this to just local/ringbuf
> types, but that seems like an unneeded restriction, particularly if
> you're operating on some section of an skb/xdp buffer that you know
> will always be contiguous.
> I adjusted xdp for that. The adjustment would be similar for skb, I
> just didn't do that since it was another layer of indirection deep and
> hadn't looked through all of those use cases. Though it should be fine
> to just reject when the buffer would have been needed, since all users
> currently provide one.
> I agree that allowing that same behavior for dnyptr_data would be more
> likely to cause confusion. Blocking the copy on dynprt_slice is much
> more explicit.
>
> >
> > would this work correctly if someone passes a non-null buffer with too
> > small size? Can you please add a test for this use case.
> >
>
> Yes, that's one of the tests that's missing there. Once I get my build
> situation sorted I'll add more tests. The behavior for a non-null
> buffer should be unchanged by this patch.

cool, sounds good

>
> > Also, I feel like for cases where we allow a NULL buffer, we need to
> > explicitly check that the register is a *known* NULL (SCALAR=0
> > basically). And also in that case the size of the buffer probably
> > should be enforced to zero, not just be allowed to be any value.
> >
>
> We absolutely should check that the pointer in question is NULL before
> ignoring the size check. I think I'm accomplishing that by ignoring
> __opt when reg->umax_value > 0 in is_kfunc_arg_optional. Is that the
> wrong check? Perhaps I should check var_off == tnum_const(0) instead.

yeah, umax_value is probably wrong check, use register_is_null()

but this approach, even if correct, is a bit too subtle. I'd code it explicitly:

  - if __opt, then we know it *can be NULL*
  - if so, we need to consider two situations
    - it is NULL, then don't enforce buffer size
    - it is not NULL (or may be not NULL), then enforce buffer size

Basically, conflating check whether argument is marked as opt with
enforcement of all the implied conditions seems very error-prone.

>
> We can't enforce the size being zero in this case because the size is
> doing double duty. It's both the length of the requested area of
> access into the dnyptr, and the size of the buffer that it might copy

yep, completely missed this double use of that constant, ignore my
point about enforcing sz==0 then

> that data into. If we don't provide a buffer, then it doesn't make
> sense to check that buffer's size. The size does still apply to the
> returned pointer though. Within the kfunc, it just needs to check for

yep

> null before copying dynptr data, as well as the regular enforcement of
> length against the dynprt/offset.
>
> > it's scary to just ignore some error, tbh, the number of error
> > conditions can grow overtime and we'll be masking them with this
> > is_kfunc_arg_optional() override. Let's be strict and explicit here.
> >
> It would probably make more sense to check is_kfunc_arg_optional and
> skip the size check altogether. Either way we're just relying on
> runtime checks against the dynptr at that point. If the buffer is
> known null and optional, we don't care what the relationship between
> the buffer and the size is, just that size and the dynptr. __szk
> already takes care of it being a constant. This doesn't affect the
> return buffer size.

yep, I agree about the logic, I'm concerned with the conflated
implementation, as I tried to explain above
Daniel Rosenberg April 29, 2023, 1:57 a.m. UTC | #4
On Thu, Apr 6, 2023 at 2:09 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> would this work correctly if someone passes a non-null buffer with too
> small size? Can you please add a test for this use case.
>
Working on a test case for this, but the test case I wrote fails
without my patches.
I'm just declaring a buffer of size 9 on the stack, and then passing
in bpf_dynptr_slice that buffer, and size 10. That's passing the
verifier just fine. In fact, it loads successfully up to size 16. I'm
guessing that's adjusting for alignment? Still feels very strange. Is
that expected behavior?
Andrii Nakryiko May 3, 2023, 6:34 p.m. UTC | #5
On Fri, Apr 28, 2023 at 6:58 PM Daniel Rosenberg <drosen@google.com> wrote:
>
> On Thu, Apr 6, 2023 at 2:09 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > would this work correctly if someone passes a non-null buffer with too
> > small size? Can you please add a test for this use case.
> >
> Working on a test case for this, but the test case I wrote fails
> without my patches.
> I'm just declaring a buffer of size 9 on the stack, and then passing
> in bpf_dynptr_slice that buffer, and size 10. That's passing the
> verifier just fine. In fact, it loads successfully up to size 16. I'm
> guessing that's adjusting for alignment? Still feels very strange. Is
> that expected behavior?

pointer to stack is trickier (verifier will just mark part of stack as
overwritten with random data), it's best to use map value pointer as a
source of buffer. So try using ARRAY map with small value_size, do
lookup_elem, check for NULL, and pass non-NULL pointer as a buffer.
diff mbox series

Patch

diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index d8a16c4bef7f..69573b511233 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -100,7 +100,7 @@  Hence, whenever a constant scalar argument is accepted by a kfunc which is not a
 size parameter, and the value of the constant matters for program safety, __k
 suffix should be used.
 
-2.2.2 __uninit Annotation
+2.2.3 __uninit Annotation
 -------------------------
 
 This annotation is used to indicate that the argument will be treated as
@@ -117,6 +117,27 @@  Here, the dynptr will be treated as an uninitialized dynptr. Without this
 annotation, the verifier will reject the program if the dynptr passed in is
 not initialized.
 
+2.2.4 __opt Annotation
+-------------------------
+
+This annotation is used to indicate that the buffer associated with an __sz or __szk
+argument may be null. If the function is passed a nullptr in place of the buffer,
+the verifier will not check that length is appropriate for the buffer. The kfunc is
+responsible for checking if this buffer is null before using it.
+
+An example is given below::
+
+        __bpf_kfunc void *bpf_dynptr_slice(..., void *buffer__opt, u32 buffer__szk)
+        {
+        ...
+        }
+
+Here, the buffer may be null. If buffer is not null, it at least of size buffer_szk.
+Either way, the returned buffer is either NULL, or of size buffer_szk. Without this
+annotation, the verifier will reject the program if a null pointer is passed in with
+a nonzero size.
+
+
 .. _BPF_kfunc_nodef:
 
 2.3 Using an existing kernel function
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 6be16db9f188..f08556fd8b96 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2145,13 +2145,15 @@  __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
  * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data.
  * @ptr: The dynptr whose data slice to retrieve
  * @offset: Offset into the dynptr
- * @buffer: User-provided buffer to copy contents into
- * @buffer__szk: Size (in bytes) of the buffer. This is the length of the
- *		 requested slice. This must be a constant.
+ * @buffer__opt: User-provided buffer to copy contents into.  May be NULL
+ * @buffer__szk: Size (in bytes) of the buffer if present. This is the
+ *               length of the requested slice. This must be a constant.
  *
  * For non-skb and non-xdp type dynptrs, there is no difference between
  * bpf_dynptr_slice and bpf_dynptr_data.
  *
+ *  If buffer__opt is NULL, the call will fail if buffer_opt was needed.
+ *
  * If the intention is to write to the data slice, please use
  * bpf_dynptr_slice_rdwr.
  *
@@ -2168,7 +2170,7 @@  __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
  * direct pointer)
  */
 __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset,
-				   void *buffer, u32 buffer__szk)
+				   void *buffer__opt, u32 buffer__szk)
 {
 	enum bpf_dynptr_type type;
 	u32 len = buffer__szk;
@@ -2188,15 +2190,19 @@  __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
 	case BPF_DYNPTR_TYPE_RINGBUF:
 		return ptr->data + ptr->offset + offset;
 	case BPF_DYNPTR_TYPE_SKB:
-		return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer);
+		if (!buffer__opt)
+			return NULL;
+		return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__opt);
 	case BPF_DYNPTR_TYPE_XDP:
 	{
 		void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
 		if (xdp_ptr)
 			return xdp_ptr;
 
-		bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer, len, false);
-		return buffer;
+		if (!buffer__opt)
+			return NULL;
+		bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false);
+		return buffer__opt;
 	}
 	default:
 		WARN_ONCE(true, "unknown dynptr type %d\n", type);
@@ -2208,13 +2214,15 @@  __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
  * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data.
  * @ptr: The dynptr whose data slice to retrieve
  * @offset: Offset into the dynptr
- * @buffer: User-provided buffer to copy contents into
- * @buffer__szk: Size (in bytes) of the buffer. This is the length of the
- *		 requested slice. This must be a constant.
+ * @buffer__opt: User-provided buffer to copy contents into. May be NULL
+ * @buffer__szk: Size (in bytes) of the buffer if present. This is the
+ *               length of the requested slice. This must be a constant.
  *
  * For non-skb and non-xdp type dynptrs, there is no difference between
  * bpf_dynptr_slice and bpf_dynptr_data.
  *
+ * If buffer__opt is NULL, the call will fail if buffer_opt was needed.
+ *
  * The returned pointer is writable and may point to either directly the dynptr
  * data at the requested offset or to the buffer if unable to obtain a direct
  * data pointer to (example: the requested slice is to the paged area of an skb
@@ -2245,7 +2253,7 @@  __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
  * direct pointer)
  */
 __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset,
-					void *buffer, u32 buffer__szk)
+					void *buffer__opt, u32 buffer__szk)
 {
 	if (!ptr->data || bpf_dynptr_is_rdonly(ptr))
 		return NULL;
@@ -2272,7 +2280,7 @@  __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o
 	 * will be copied out into the buffer and the user will need to call
 	 * bpf_dynptr_write() to commit changes.
 	 */
-	return bpf_dynptr_slice(ptr, offset, buffer, buffer__szk);
+	return bpf_dynptr_slice(ptr, offset, buffer__opt, buffer__szk);
 }
 
 __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 20beab52812a..b82faef389b1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9428,6 +9428,19 @@  static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
 	return __kfunc_param_match_suffix(btf, arg, "__szk");
 }
 
+static bool is_kfunc_arg_optional(const struct btf *btf,
+		  const struct btf_param *arg,
+		  const struct bpf_reg_state *reg)
+{
+	const struct btf_type *t;
+
+	t = btf_type_skip_modifiers(btf, arg->type, NULL);
+	if (!btf_type_is_ptr(t) || reg->type != SCALAR_VALUE || reg->umax_value > 0)
+		return false;
+
+	return __kfunc_param_match_suffix(btf, arg, "__opt");
+}
+
 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
 {
 	return __kfunc_param_match_suffix(btf, arg, "__k");
@@ -10539,10 +10552,14 @@  static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			break;
 		case KF_ARG_PTR_TO_MEM_SIZE:
 		{
+			struct bpf_reg_state *buff_reg = &regs[regno];
+			const struct btf_param *buff_arg = &args[i];
 			struct bpf_reg_state *size_reg = &regs[regno + 1];
 			const struct btf_param *size_arg = &args[i + 1];
 
 			ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
+			if (ret < 0 && is_kfunc_arg_optional(meta->btf, buff_arg, buff_reg))
+				ret = 0;
 			if (ret < 0) {
 				verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
 				return ret;