diff mbox series

[bpf-next,v1,6/7] bpf: Dynptr support for ring buffers

Message ID 20220402015826.3941317-7-joannekoong@fb.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Dynamic pointers | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 1818 this patch: 1819
netdev/cc_maintainers warning 6 maintainers not CCed: songliubraving@fb.com netdev@vger.kernel.org kafai@fb.com yhs@fb.com john.fastabend@gmail.com kpsingh@kernel.org
netdev/build_clang success Errors and warnings before: 195 this patch: 195
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1828 this patch: 1828
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Joanne Koong April 2, 2022, 1:58 a.m. UTC
From: Joanne Koong <joannelkoong@gmail.com>

Currently, our only way of writing dynamically-sized data into a ring
buffer is through bpf_ringbuf_output but this incurs an extra memcpy
cost. bpf_ringbuf_reserve + bpf_ringbuf_commit avoids this extra
memcpy, but it can only safely support reservation sizes that are
statically known since the verifier cannot guarantee that the bpf
program won’t access memory outside the reserved space.

The bpf_dynptr abstraction allows for dynamically-sized ring buffer
reservations without the extra memcpy.

There are 3 new APIs:

long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr);
void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags);
void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags);

These closely follow the functionalities of the original ringbuf APIs.
For example, all ringbuffer dynptrs that have been reserved must be
either submitted or discarded before the program exits.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
 include/linux/bpf.h            | 10 ++++-
 include/uapi/linux/bpf.h       | 30 ++++++++++++++
 kernel/bpf/helpers.c           |  6 +++
 kernel/bpf/ringbuf.c           | 71 ++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c          | 17 ++++++--
 tools/include/uapi/linux/bpf.h | 30 ++++++++++++++
 6 files changed, 160 insertions(+), 4 deletions(-)

Comments

kernel test robot April 2, 2022, 6:40 a.m. UTC | #1
Hi Joanne,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Joanne-Koong/Dynamic-pointers/20220402-100110
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: powerpc-allmodconfig (https://download.01.org/0day-ci/archive/20220402/202204021459.6f2G1oTF-lkp@intel.com/config)
compiler: powerpc-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/64c5b9e2d2df7ff61dd8bd2e36a29ffff264e2ff
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Joanne-Koong/Dynamic-pointers/20220402-100110
        git checkout 64c5b9e2d2df7ff61dd8bd2e36a29ffff264e2ff
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=powerpc SHELL=/bin/bash kernel/bpf/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   kernel/bpf/ringbuf.c: In function '____bpf_ringbuf_reserve_dynptr':
>> kernel/bpf/ringbuf.c:491:18: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
     491 |         sample = (void *)____bpf_ringbuf_reserve(map, size, flags);
         |                  ^


vim +491 kernel/bpf/ringbuf.c

   478	
   479	BPF_CALL_4(bpf_ringbuf_reserve_dynptr, struct bpf_map *, map, u32, size, u64, flags,
   480		   struct bpf_dynptr_kern *, ptr)
   481	{
   482		void *sample;
   483		int err;
   484	
   485		err = bpf_dynptr_check_size(size);
   486		if (err) {
   487			bpf_dynptr_set_null(ptr);
   488			return err;
   489		}
   490	
 > 491		sample = (void *)____bpf_ringbuf_reserve(map, size, flags);
   492	
   493		if (!sample) {
   494			bpf_dynptr_set_null(ptr);
   495			return -EINVAL;
   496		}
   497	
   498		bpf_dynptr_init(ptr, sample, BPF_DYNPTR_TYPE_RINGBUF, 0, size);
   499	
   500		return 0;
   501	}
   502
Andrii Nakryiko April 6, 2022, 10:50 p.m. UTC | #2
On Fri, Apr 1, 2022 at 7:00 PM Joanne Koong <joannekoong@fb.com> wrote:
>
> From: Joanne Koong <joannelkoong@gmail.com>
>
> Currently, our only way of writing dynamically-sized data into a ring
> buffer is through bpf_ringbuf_output but this incurs an extra memcpy
> cost. bpf_ringbuf_reserve + bpf_ringbuf_commit avoids this extra
> memcpy, but it can only safely support reservation sizes that are
> statically known since the verifier cannot guarantee that the bpf
> program won’t access memory outside the reserved space.
>
> The bpf_dynptr abstraction allows for dynamically-sized ring buffer
> reservations without the extra memcpy.
>
> There are 3 new APIs:
>
> long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr);
> void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags);
> void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags);
>
> These closely follow the functionalities of the original ringbuf APIs.
> For example, all ringbuffer dynptrs that have been reserved must be
> either submitted or discarded before the program exits.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---
>  include/linux/bpf.h            | 10 ++++-
>  include/uapi/linux/bpf.h       | 30 ++++++++++++++
>  kernel/bpf/helpers.c           |  6 +++
>  kernel/bpf/ringbuf.c           | 71 ++++++++++++++++++++++++++++++++++
>  kernel/bpf/verifier.c          | 17 ++++++--
>  tools/include/uapi/linux/bpf.h | 30 ++++++++++++++
>  6 files changed, 160 insertions(+), 4 deletions(-)
>

Looks great and is a very straightforward implementation, great job!
Please fix the warning and maybe expand a bit on "failure modes", but
otherwise:

Acked-by: Andrii Nakryiko <andrii@kernel.org>

>  /* The upper 4 bits of dynptr->size are reserved. Consequently, the
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index c835e437cb28..778de0b052c1 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5202,6 +5202,33 @@ union bpf_attr {
>   *             Pointer to the underlying dynptr data, NULL if the ptr is
>   *             read-only, if the dynptr is invalid, or if the offset and length
>   *             is out of bounds.
> + *
> + * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr)

looking at this, out param dynptr at the end makes more sense again...
ok, I'm fine either way I guess :)

> + *     Description
> + *             Reserve *size* bytes of payload in a ring buffer *ringbuf*
> + *             through the dynptr interface. *flags* must be 0.
> + *     Return
> + *             0 on success, or a negative error in case of failure.

It would be good to mention that the verifier will enforce submit or
discard even when reservation fails. And submit_dnyptr/discard_dynptr
is a no-op for such failed/null dynptrs.

> + *
> + * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags)
> + *     Description
> + *             Submit reserved ring buffer sample, pointed to by *data*,
> + *             through the dynptr interface.
> + *
> + *             For more information on *flags*, please see
> + *             'bpf_ringbuf_submit'.
> + *     Return
> + *             Nothing. Always succeeds.
> + *

[...]
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cded9753fb7f..2672360172c5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -352,7 +352,10 @@  enum bpf_type_flag {
 	/* DYNPTR points to dynamically allocated memory. */
 	DYNPTR_TYPE_MALLOC	= BIT(8 + BPF_BASE_TYPE_BITS),
 
-	__BPF_TYPE_LAST_FLAG	= DYNPTR_TYPE_MALLOC,
+	/* DYNPTR points to a ringbuf record. */
+	DYNPTR_TYPE_RINGBUF	= BIT(9 + BPF_BASE_TYPE_BITS),
+
+	__BPF_TYPE_LAST_FLAG	= DYNPTR_TYPE_RINGBUF,
 };
 
 /* Max number of base types. */
@@ -2255,6 +2258,9 @@  extern const struct bpf_func_proto bpf_ringbuf_reserve_proto;
 extern const struct bpf_func_proto bpf_ringbuf_submit_proto;
 extern const struct bpf_func_proto bpf_ringbuf_discard_proto;
 extern const struct bpf_func_proto bpf_ringbuf_query_proto;
+extern const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto;
+extern const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto;
+extern const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
@@ -2418,6 +2424,8 @@  enum bpf_dynptr_type {
 	BPF_DYNPTR_TYPE_LOCAL,
 	/* Memory allocated dynamically by the kernel for the dynptr */
 	BPF_DYNPTR_TYPE_MALLOC,
+	/* Underlying data is a ringbuf record */
+	BPF_DYNPTR_TYPE_RINGBUF,
 };
 
 /* The upper 4 bits of dynptr->size are reserved. Consequently, the
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c835e437cb28..778de0b052c1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5202,6 +5202,33 @@  union bpf_attr {
  *		Pointer to the underlying dynptr data, NULL if the ptr is
  *		read-only, if the dynptr is invalid, or if the offset and length
  *		is out of bounds.
+ *
+ * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr)
+ *	Description
+ *		Reserve *size* bytes of payload in a ring buffer *ringbuf*
+ *		through the dynptr interface. *flags* must be 0.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags)
+ *	Description
+ *		Submit reserved ring buffer sample, pointed to by *data*,
+ *		through the dynptr interface.
+ *
+ *		For more information on *flags*, please see
+ *		'bpf_ringbuf_submit'.
+ *	Return
+ *		Nothing. Always succeeds.
+ *
+ * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags)
+ *	Description
+ *		Discard reserved ring buffer sample through the dynptr
+ *		interface.
+ *
+ *		For more information on *flags*, please see
+ *		'bpf_ringbuf_discard'.
+ *	Return
+ *		Nothing. Always succeeds.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5404,6 +5431,9 @@  union bpf_attr {
 	FN(dynptr_read),		\
 	FN(dynptr_write),		\
 	FN(dynptr_data),		\
+	FN(ringbuf_reserve_dynptr),	\
+	FN(ringbuf_submit_dynptr),	\
+	FN(ringbuf_discard_dynptr),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index c1295fb5d9d4..7b1e467f0504 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1580,6 +1580,12 @@  bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_ringbuf_discard_proto;
 	case BPF_FUNC_ringbuf_query:
 		return &bpf_ringbuf_query_proto;
+	case BPF_FUNC_ringbuf_reserve_dynptr:
+		return &bpf_ringbuf_reserve_dynptr_proto;
+	case BPF_FUNC_ringbuf_submit_dynptr:
+		return &bpf_ringbuf_submit_dynptr_proto;
+	case BPF_FUNC_ringbuf_discard_dynptr:
+		return &bpf_ringbuf_discard_dynptr_proto;
 	case BPF_FUNC_for_each_map_elem:
 		return &bpf_for_each_map_elem_proto;
 	case BPF_FUNC_loop:
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index a723aa484ce4..cdbeeb4819ae 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -475,3 +475,74 @@  const struct bpf_func_proto bpf_ringbuf_query_proto = {
 	.arg1_type	= ARG_CONST_MAP_PTR,
 	.arg2_type	= ARG_ANYTHING,
 };
+
+BPF_CALL_4(bpf_ringbuf_reserve_dynptr, struct bpf_map *, map, u32, size, u64, flags,
+	   struct bpf_dynptr_kern *, ptr)
+{
+	void *sample;
+	int err;
+
+	err = bpf_dynptr_check_size(size);
+	if (err) {
+		bpf_dynptr_set_null(ptr);
+		return err;
+	}
+
+	sample = (void *)____bpf_ringbuf_reserve(map, size, flags);
+
+	if (!sample) {
+		bpf_dynptr_set_null(ptr);
+		return -EINVAL;
+	}
+
+	bpf_dynptr_init(ptr, sample, BPF_DYNPTR_TYPE_RINGBUF, 0, size);
+
+	return 0;
+}
+
+const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto = {
+	.func		= bpf_ringbuf_reserve_dynptr,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_UNINIT,
+};
+
+BPF_CALL_2(bpf_ringbuf_submit_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags)
+{
+	if (!ptr->data)
+		return 0;
+
+	____bpf_ringbuf_submit(ptr->data, flags);
+
+	bpf_dynptr_set_null(ptr);
+
+	return 0;
+}
+
+const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto = {
+	.func		= bpf_ringbuf_submit_dynptr,
+	.ret_type	= RET_VOID,
+	.arg1_type	= ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_RELEASE,
+	.arg2_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_ringbuf_discard_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags)
+{
+	if (!ptr->data)
+		return 0;
+
+	____bpf_ringbuf_discard(ptr->data, flags);
+
+	bpf_dynptr_set_null(ptr);
+
+	return 0;
+}
+
+const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto = {
+	.func		= bpf_ringbuf_discard_dynptr,
+	.ret_type	= RET_VOID,
+	.arg1_type	= ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_RELEASE,
+	.arg2_type	= ARG_ANYTHING,
+};
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7352ffb4f9a5..6336476eac7d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -679,7 +679,7 @@  static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
 
 static int arg_to_dynptr_type(enum bpf_arg_type arg_type, enum bpf_dynptr_type *dynptr_type)
 {
-	int type = arg_type & (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_MALLOC);
+	int type = arg_type & (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_MALLOC | DYNPTR_TYPE_RINGBUF);
 
 	switch (type) {
 	case DYNPTR_TYPE_LOCAL:
@@ -688,6 +688,9 @@  static int arg_to_dynptr_type(enum bpf_arg_type arg_type, enum bpf_dynptr_type *
 	case DYNPTR_TYPE_MALLOC:
 		*dynptr_type = BPF_DYNPTR_TYPE_MALLOC;
 		break;
+	case DYNPTR_TYPE_RINGBUF:
+		*dynptr_type = BPF_DYNPTR_TYPE_RINGBUF;
+		break;
 	default:
 		/* Can't have more than one type set and can't have no
 		 * type set
@@ -702,7 +705,7 @@  static bool dynptr_type_refcounted(struct bpf_func_state *state, int spi)
 {
 	enum bpf_dynptr_type type = state->stack[spi].spilled_ptr.dynptr_type;
 
-	return type == BPF_DYNPTR_TYPE_MALLOC;
+	return type == BPF_DYNPTR_TYPE_MALLOC || type == BPF_DYNPTR_TYPE_RINGBUF;
 }
 
 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
@@ -5842,6 +5845,8 @@  static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 					err_extra = "local ";
 				else if (arg_type & DYNPTR_TYPE_MALLOC)
 					err_extra = "malloc ";
+				else if (arg_type & DYNPTR_TYPE_RINGBUF)
+					err_extra = "ringbuf ";
 				verbose(env, "Expected an initialized %sdynptr as arg #%d\n",
 					err_extra, arg + 1);
 				return -EINVAL;
@@ -5966,7 +5971,10 @@  static int check_map_func_compatibility(struct bpf_verifier_env *env,
 	case BPF_MAP_TYPE_RINGBUF:
 		if (func_id != BPF_FUNC_ringbuf_output &&
 		    func_id != BPF_FUNC_ringbuf_reserve &&
-		    func_id != BPF_FUNC_ringbuf_query)
+		    func_id != BPF_FUNC_ringbuf_query &&
+		    func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
+		    func_id != BPF_FUNC_ringbuf_submit_dynptr &&
+		    func_id != BPF_FUNC_ringbuf_discard_dynptr)
 			goto error;
 		break;
 	case BPF_MAP_TYPE_STACK_TRACE:
@@ -6082,6 +6090,9 @@  static int check_map_func_compatibility(struct bpf_verifier_env *env,
 	case BPF_FUNC_ringbuf_output:
 	case BPF_FUNC_ringbuf_reserve:
 	case BPF_FUNC_ringbuf_query:
+	case BPF_FUNC_ringbuf_reserve_dynptr:
+	case BPF_FUNC_ringbuf_submit_dynptr:
+	case BPF_FUNC_ringbuf_discard_dynptr:
 		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
 			goto error;
 		break;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c835e437cb28..778de0b052c1 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5202,6 +5202,33 @@  union bpf_attr {
  *		Pointer to the underlying dynptr data, NULL if the ptr is
  *		read-only, if the dynptr is invalid, or if the offset and length
  *		is out of bounds.
+ *
+ * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr)
+ *	Description
+ *		Reserve *size* bytes of payload in a ring buffer *ringbuf*
+ *		through the dynptr interface. *flags* must be 0.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags)
+ *	Description
+ *		Submit reserved ring buffer sample, pointed to by *data*,
+ *		through the dynptr interface.
+ *
+ *		For more information on *flags*, please see
+ *		'bpf_ringbuf_submit'.
+ *	Return
+ *		Nothing. Always succeeds.
+ *
+ * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags)
+ *	Description
+ *		Discard reserved ring buffer sample through the dynptr
+ *		interface.
+ *
+ *		For more information on *flags*, please see
+ *		'bpf_ringbuf_discard'.
+ *	Return
+ *		Nothing. Always succeeds.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5404,6 +5431,9 @@  union bpf_attr {
 	FN(dynptr_read),		\
 	FN(dynptr_write),		\
 	FN(dynptr_data),		\
+	FN(ringbuf_reserve_dynptr),	\
+	FN(ringbuf_submit_dynptr),	\
+	FN(ringbuf_discard_dynptr),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper