diff mbox series

[bpf-next,v2,3/5] bpf: compute data_end dynamically with JIT code

Message ID 20210210022136.146528-4-xiyou.wangcong@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series sock_map: clean up and refactor code for BPF_SK_SKB_VERDICT | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 9 maintainers not CCed: yhs@fb.com davem@davemloft.net kafai@fb.com edumazet@google.com ast@kernel.org songliubraving@fb.com kpsingh@kernel.org kuba@kernel.org andrii@kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit fail Errors and warnings before: 1503 this patch: 1504
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 107 lines checked
netdev/build_allmodconfig_warn fail Errors and warnings before: 1505 this patch: 1506
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Cong Wang Feb. 10, 2021, 2:21 a.m. UTC
From: Cong Wang <cong.wang@bytedance.com>

Currently, we compute ->data_end with a compile-time constant
offset of skb. But as Jakub pointed out, we can actually compute
it in eBPF JIT code at run-time, so that we can competely get
rid of ->data_end. This is similar to skb_shinfo(skb) computation
in bpf_convert_shinfo_access().

Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 include/net/tcp.h |  6 ------
 net/core/filter.c | 46 +++++++++++++++++++++++++++-------------------
 net/core/skmsg.c  |  1 -
 3 files changed, 27 insertions(+), 26 deletions(-)

Comments

Lorenz Bauer Feb. 12, 2021, 10:55 a.m. UTC | #1
On Wed, 10 Feb 2021 at 02:22, Cong Wang <xiyou.wangcong@gmail.com> wrote:
>
> From: Cong Wang <cong.wang@bytedance.com>
>
> Currently, we compute ->data_end with a compile-time constant
> offset of skb. But as Jakub pointed out, we can actually compute
> it in eBPF JIT code at run-time, so that we can competely get
> rid of ->data_end. This is similar to skb_shinfo(skb) computation
> in bpf_convert_shinfo_access().
>
> Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
> Cc: John Fastabend <john.fastabend@gmail.com>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Lorenz Bauer <lmb@cloudflare.com>
> Signed-off-by: Cong Wang <cong.wang@bytedance.com>

...

> @@ -9520,6 +9510,29 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
>         return insn - insn_buf;
>  }
>
> +static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
> +                                                   struct bpf_insn *insn)

Is it worth adding a reference to this function in skb_headlen(),
since we're basically open coding that function here?

> +{
> +       /* si->dst_reg = skb->data */
> +       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
> +                             si->dst_reg, si->src_reg,
> +                             offsetof(struct sk_buff, data));
> +       /* AX = skb->len */
> +       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
> +                             BPF_REG_AX, si->src_reg,
> +                             offsetof(struct sk_buff, len));
> +       /* si->dst_reg = skb->data + skb->len */
> +       *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
> +       /* AX = skb->data_len */
> +       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
> +                             BPF_REG_AX, si->src_reg,
> +                             offsetof(struct sk_buff, data_len));
> +       /* si->dst_reg = skb->data + skb->len - skb->data_len */
> +       *insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);
> +
> +       return insn;
> +}
> +
>  static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
>                                      const struct bpf_insn *si,
>                                      struct bpf_insn *insn_buf,
> @@ -9530,12 +9543,7 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
>
>         switch (si->off) {
>         case offsetof(struct __sk_buff, data_end):
> -               off  = si->off;
> -               off -= offsetof(struct __sk_buff, data_end);
> -               off += offsetof(struct sk_buff, cb);
> -               off += offsetof(struct tcp_skb_cb, bpf.data_end);
> -               *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
> -                                     si->src_reg, off);
> +               insn = bpf_convert_data_end_access(si, insn);

This generates a new warning:

../net/core/filter.c: In function ‘sk_skb_convert_ctx_access’:
../net/core/filter.c:9542:6: warning: unused variable ‘off’ [-Wunused-variable]
 9542 |  int off;
      |      ^~~

--
Lorenz Bauer  |  Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK

www.cloudflare.com
Cong Wang Feb. 12, 2021, 7:01 p.m. UTC | #2
On Fri, Feb 12, 2021 at 2:56 AM Lorenz Bauer <lmb@cloudflare.com> wrote:
>
> On Wed, 10 Feb 2021 at 02:22, Cong Wang <xiyou.wangcong@gmail.com> wrote:
> >
> > From: Cong Wang <cong.wang@bytedance.com>
> >
> > Currently, we compute ->data_end with a compile-time constant
> > offset of skb. But as Jakub pointed out, we can actually compute
> > it in eBPF JIT code at run-time, so that we can competely get
> > rid of ->data_end. This is similar to skb_shinfo(skb) computation
> > in bpf_convert_shinfo_access().
> >
> > Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
> > Cc: John Fastabend <john.fastabend@gmail.com>
> > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > Cc: Lorenz Bauer <lmb@cloudflare.com>
> > Signed-off-by: Cong Wang <cong.wang@bytedance.com>
>
> ...
>
> > @@ -9520,6 +9510,29 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
> >         return insn - insn_buf;
> >  }
> >
> > +static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
> > +                                                   struct bpf_insn *insn)
>
> Is it worth adding a reference to this function in skb_headlen(),
> since we're basically open coding that function here?

I do not mind adding a comment for this.

>
> > +{
> > +       /* si->dst_reg = skb->data */
> > +       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
> > +                             si->dst_reg, si->src_reg,
> > +                             offsetof(struct sk_buff, data));
> > +       /* AX = skb->len */
> > +       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
> > +                             BPF_REG_AX, si->src_reg,
> > +                             offsetof(struct sk_buff, len));
> > +       /* si->dst_reg = skb->data + skb->len */
> > +       *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
> > +       /* AX = skb->data_len */
> > +       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
> > +                             BPF_REG_AX, si->src_reg,
> > +                             offsetof(struct sk_buff, data_len));
> > +       /* si->dst_reg = skb->data + skb->len - skb->data_len */
> > +       *insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);
> > +
> > +       return insn;
> > +}
> > +
> >  static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
> >                                      const struct bpf_insn *si,
> >                                      struct bpf_insn *insn_buf,
> > @@ -9530,12 +9543,7 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
> >
> >         switch (si->off) {
> >         case offsetof(struct __sk_buff, data_end):
> > -               off  = si->off;
> > -               off -= offsetof(struct __sk_buff, data_end);
> > -               off += offsetof(struct sk_buff, cb);
> > -               off += offsetof(struct tcp_skb_cb, bpf.data_end);
> > -               *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
> > -                                     si->src_reg, off);
> > +               insn = bpf_convert_data_end_access(si, insn);
>
> This generates a new warning:
>
> ../net/core/filter.c: In function ‘sk_skb_convert_ctx_access’:
> ../net/core/filter.c:9542:6: warning: unused variable ‘off’ [-Wunused-variable]
>  9542 |  int off;
>       |      ^~~

Good catch!

Apparently neither my compiler nor kernel-test-bot's catches this.

Thanks.
diff mbox series

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index dfb20d51bf3d..808d5292cf13 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -885,18 +885,12 @@  struct tcp_skb_cb {
 		struct {
 			__u32 flags;
 			struct sock *sk_redir;
-			void *data_end;
 		} bpf;
 	};
 };
 
 #define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
 
-static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
-{
-	TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
-}
-
 static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
 {
 	return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
diff --git a/net/core/filter.c b/net/core/filter.c
index e15d4741719a..b2b18426d280 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1863,10 +1863,7 @@  static const struct bpf_func_proto bpf_sk_fullsock_proto = {
 static inline int sk_skb_try_make_writable(struct sk_buff *skb,
 					   unsigned int write_len)
 {
-	int err = __bpf_try_make_writable(skb, write_len);
-
-	bpf_compute_data_end_sk_skb(skb);
-	return err;
+	return __bpf_try_make_writable(skb, write_len);
 }
 
 BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
@@ -3581,7 +3578,6 @@  BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 			return -ENOMEM;
 		__skb_pull(skb, len_diff_abs);
 	}
-	bpf_compute_data_end_sk_skb(skb);
 	if (tls_sw_has_ctx_rx(skb->sk)) {
 		struct strp_msg *rxm = strp_msg(skb);
 
@@ -3746,10 +3742,7 @@  static const struct bpf_func_proto bpf_skb_change_tail_proto = {
 BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
 	   u64, flags)
 {
-	int ret = __bpf_skb_change_tail(skb, new_len, flags);
-
-	bpf_compute_data_end_sk_skb(skb);
-	return ret;
+	return __bpf_skb_change_tail(skb, new_len, flags);
 }
 
 static const struct bpf_func_proto sk_skb_change_tail_proto = {
@@ -3812,10 +3805,7 @@  static const struct bpf_func_proto bpf_skb_change_head_proto = {
 BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
 	   u64, flags)
 {
-	int ret = __bpf_skb_change_head(skb, head_room, flags);
-
-	bpf_compute_data_end_sk_skb(skb);
-	return ret;
+	return __bpf_skb_change_head(skb, head_room, flags);
 }
 
 static const struct bpf_func_proto sk_skb_change_head_proto = {
@@ -9520,6 +9510,29 @@  static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
+static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
+						    struct bpf_insn *insn)
+{
+	/* si->dst_reg = skb->data */
+	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
+			      si->dst_reg, si->src_reg,
+			      offsetof(struct sk_buff, data));
+	/* AX = skb->len */
+	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
+			      BPF_REG_AX, si->src_reg,
+			      offsetof(struct sk_buff, len));
+	/* si->dst_reg = skb->data + skb->len */
+	*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
+	/* AX = skb->data_len */
+	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
+			      BPF_REG_AX, si->src_reg,
+			      offsetof(struct sk_buff, data_len));
+	/* si->dst_reg = skb->data + skb->len - skb->data_len */
+	*insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);
+
+	return insn;
+}
+
 static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
 				     const struct bpf_insn *si,
 				     struct bpf_insn *insn_buf,
@@ -9530,12 +9543,7 @@  static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
 
 	switch (si->off) {
 	case offsetof(struct __sk_buff, data_end):
-		off  = si->off;
-		off -= offsetof(struct __sk_buff, data_end);
-		off += offsetof(struct sk_buff, cb);
-		off += offsetof(struct tcp_skb_cb, bpf.data_end);
-		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
-				      si->src_reg, off);
+		insn = bpf_convert_data_end_access(si, insn);
 		break;
 	default:
 		return bpf_convert_ctx_access(type, si, insn_buf, prog,
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 9d673179e886..64166e48999c 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -746,7 +746,6 @@  EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
 static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
 			    struct sk_buff *skb)
 {
-	bpf_compute_data_end_sk_skb(skb);
 	return bpf_prog_run_pin_on_cpu(prog, skb);
 }