diff mbox series

[v3] skmsg: lose offset info in sk_psock_skb_ingress

Message ID 20210922093259.164013-1-liujian56@huawei.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series [v3] skmsg: lose offset info in sk_psock_skb_ingress | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Guessed tree name to be net-next
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cc_maintainers success CCed 14 of 14 maintainers
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 67 this patch: 67
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning CHECK: Prefer using the BIT macro WARNING: line length of 81 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 67 this patch: 67
netdev/header_inline success Link
bpf/vmtest-bpf-next success VM_Test
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-PR pending PR summary
bpf/vmtest-bpf pending VM_Test

Commit Message

Liu Jian Sept. 22, 2021, 9:32 a.m. UTC
If sockmap enable strparser, there are lose offset info in
sk_psock_skb_ingress. If the length determined by parse_msg function
is not skb->len, the skb will be converted to sk_msg multiple times,
and userspace app will get the data multiple times.

Fix this by get the offset and length from strp_msg.
And as Cong suggestion, add one bit in skb->_sk_redir to distinguish
enable or disable strparser.

Signed-off-by: Liu Jian <liujian56@huawei.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>

---
v1->v2: fix build error when disable CONFIG_BPF_STREAM_PARSER
v2->v3: Add one bit in skb->_sk_redir to distinguish enable or disable strparser

 include/linux/skmsg.h | 18 ++++++++++++++--
 net/core/skmsg.c      | 48 ++++++++++++++++++++++++++++++++++---------
 2 files changed, 54 insertions(+), 12 deletions(-)

Comments

Cong Wang Sept. 28, 2021, 5:52 a.m. UTC | #1
On Wed, Sep 22, 2021 at 2:32 AM Liu Jian <liujian56@huawei.com> wrote:
>  static void sk_psock_skb_state(struct sk_psock *psock,
> @@ -604,6 +608,9 @@ static void sk_psock_backlog(struct work_struct *work)
>  {
>         struct sk_psock *psock = container_of(work, struct sk_psock, work);
>         struct sk_psock_work_state *state = &psock->work_state;
> +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> +       struct strp_msg *stm = NULL;
> +#endif
>         struct sk_buff *skb = NULL;
>         bool ingress;
>         u32 len, off;
> @@ -624,6 +631,13 @@ static void sk_psock_backlog(struct work_struct *work)
>         while ((skb = skb_dequeue(&psock->ingress_skb))) {
>                 len = skb->len;
>                 off = 0;
> +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> +               if (skb_bpf_strparser(skb)) {

If CONFIG_BPF_STREAM_PARSER is disabled, this
should always return false, hence you don't need this #ifdef.
Or alternatively, you can at least define for nop for
skb_bpf_strparser() if !CONFIG_BPF_STREAM_PARSER.
And you can move the above "stm" down here too.

(Ditto for the other place below.)

Thanks.
Liu Jian Sept. 29, 2021, 2:08 a.m. UTC | #2
> -----Original Message-----
> From: Cong Wang [mailto:xiyou.wangcong@gmail.com]
> Sent: Tuesday, September 28, 2021 1:52 PM
> To: liujian (CE) <liujian56@huawei.com>
> Cc: John Fastabend <john.fastabend@gmail.com>; Daniel Borkmann
> <daniel@iogearbox.net>; Jakub Sitnicki <jakub@cloudflare.com>; Lorenz
> Bauer <lmb@cloudflare.com>; David Miller <davem@davemloft.net>; Jakub
> Kicinski <kuba@kernel.org>; Alexei Starovoitov <ast@kernel.org>; Andrii
> Nakryiko <andrii@kernel.org>; Martin KaFai Lau <kafai@fb.com>; Song Liu
> <songliubraving@fb.com>; Yonghong Song <yhs@fb.com>; KP Singh
> <kpsingh@kernel.org>; Linux Kernel Network Developers
> <netdev@vger.kernel.org>; bpf <bpf@vger.kernel.org>
> Subject: Re: [PATCH v3] skmsg: lose offset info in sk_psock_skb_ingress
> 
> On Wed, Sep 22, 2021 at 2:32 AM Liu Jian <liujian56@huawei.com> wrote:
> >  static void sk_psock_skb_state(struct sk_psock *psock, @@ -604,6
> > +608,9 @@ static void sk_psock_backlog(struct work_struct *work)  {
> >         struct sk_psock *psock = container_of(work, struct sk_psock, work);
> >         struct sk_psock_work_state *state = &psock->work_state;
> > +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> > +       struct strp_msg *stm = NULL;
> > +#endif
> >         struct sk_buff *skb = NULL;
> >         bool ingress;
> >         u32 len, off;
> > @@ -624,6 +631,13 @@ static void sk_psock_backlog(struct work_struct
> *work)
> >         while ((skb = skb_dequeue(&psock->ingress_skb))) {
> >                 len = skb->len;
> >                 off = 0;
> > +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> > +               if (skb_bpf_strparser(skb)) {
> 
> If CONFIG_BPF_STREAM_PARSER is disabled, this should always return false,
> hence you don't need this #ifdef.
> Or alternatively, you can at least define for nop for
> skb_bpf_strparser() if !CONFIG_BPF_STREAM_PARSER.
> And you can move the above "stm" down here too.
> 
V4 has been sent, thank you~

> (Ditto for the other place below.)
> 
> Thanks.
diff mbox series

Patch

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 14ab0c0bc924..74eb0e97b4bd 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -508,8 +508,22 @@  static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
 
 #if IS_ENABLED(CONFIG_NET_SOCK_MSG)
 
-/* We only have one bit so far. */
-#define BPF_F_PTR_MASK ~(BPF_F_INGRESS)
+#define BPF_F_STRPARSER	(1UL << 1)
+
+/* We only have two bit so far. */
+#define BPF_F_PTR_MASK ~(BPF_F_INGRESS | BPF_F_STRPARSER)
+
+static inline bool skb_bpf_strparser(const struct sk_buff *skb)
+{
+	unsigned long sk_redir = skb->_sk_redir;
+
+	return sk_redir & BPF_F_STRPARSER;
+}
+
+static inline void skb_bpf_set_strparser(struct sk_buff *skb)
+{
+	skb->_sk_redir |= BPF_F_STRPARSER;
+}
 
 static inline bool skb_bpf_ingress(const struct sk_buff *skb)
 {
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 2d6249b28928..2faf2fa110f2 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -494,6 +494,7 @@  static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
 }
 
 static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
+					u32 off, u32 len,
 					struct sk_psock *psock,
 					struct sock *sk,
 					struct sk_msg *msg)
@@ -507,11 +508,11 @@  static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
 	 */
 	if (skb_linearize(skb))
 		return -EAGAIN;
-	num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
+	num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
 	if (unlikely(num_sge < 0))
 		return num_sge;
 
-	copied = skb->len;
+	copied = len;
 	msg->sg.start = 0;
 	msg->sg.size = copied;
 	msg->sg.end = num_sge;
@@ -522,9 +523,11 @@  static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
 	return copied;
 }
 
-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb);
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+				     u32 off, u32 len);
 
-static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
+				u32 off, u32 len)
 {
 	struct sock *sk = psock->sk;
 	struct sk_msg *msg;
@@ -535,7 +538,7 @@  static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 	 * correctly.
 	 */
 	if (unlikely(skb->sk == sk))
-		return sk_psock_skb_ingress_self(psock, skb);
+		return sk_psock_skb_ingress_self(psock, skb, off, len);
 	msg = sk_psock_create_ingress_msg(sk, skb);
 	if (!msg)
 		return -EAGAIN;
@@ -547,7 +550,7 @@  static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 	 * into user buffers.
 	 */
 	skb_set_owner_r(skb, sk);
-	err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+	err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
 	if (err < 0)
 		kfree(msg);
 	return err;
@@ -557,7 +560,8 @@  static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
  * skb. In this case we do not need to check memory limits or skb_set_owner_r
  * because the skb is already accounted for here.
  */
-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb)
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+				     u32 off, u32 len)
 {
 	struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
 	struct sock *sk = psock->sk;
@@ -567,7 +571,7 @@  static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
 		return -EAGAIN;
 	sk_msg_init(msg);
 	skb_set_owner_r(skb, sk);
-	err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+	err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
 	if (err < 0)
 		kfree(msg);
 	return err;
@@ -581,7 +585,7 @@  static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
 			return -EAGAIN;
 		return skb_send_sock(psock->sk, skb, off, len);
 	}
-	return sk_psock_skb_ingress(psock, skb);
+	return sk_psock_skb_ingress(psock, skb, off, len);
 }
 
 static void sk_psock_skb_state(struct sk_psock *psock,
@@ -604,6 +608,9 @@  static void sk_psock_backlog(struct work_struct *work)
 {
 	struct sk_psock *psock = container_of(work, struct sk_psock, work);
 	struct sk_psock_work_state *state = &psock->work_state;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+	struct strp_msg *stm = NULL;
+#endif
 	struct sk_buff *skb = NULL;
 	bool ingress;
 	u32 len, off;
@@ -624,6 +631,13 @@  static void sk_psock_backlog(struct work_struct *work)
 	while ((skb = skb_dequeue(&psock->ingress_skb))) {
 		len = skb->len;
 		off = 0;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+		if (skb_bpf_strparser(skb)) {
+			stm = strp_msg(skb);
+			off = stm->offset;
+			len = stm->full_len;
+		}
+#endif
 start:
 		ingress = skb_bpf_ingress(skb);
 		skb_bpf_redirect_clear(skb);
@@ -930,6 +944,10 @@  static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
 {
 	struct sock *sk_other;
 	int err = 0;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+	struct strp_msg *stm = NULL;
+#endif
+	u32 len, off;
 
 	switch (verdict) {
 	case __SK_PASS:
@@ -949,7 +967,16 @@  static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
 		 * retrying later from workqueue.
 		 */
 		if (skb_queue_empty(&psock->ingress_skb)) {
-			err = sk_psock_skb_ingress_self(psock, skb);
+			len = skb->len;
+			off = 0;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+			if (skb_bpf_strparser(skb)) {
+				stm = strp_msg(skb);
+				off = stm->offset;
+				len = stm->full_len;
+			}
+#endif
+			err = sk_psock_skb_ingress_self(psock, skb, off, len);
 		}
 		if (err < 0) {
 			spin_lock_bh(&psock->ingress_lock);
@@ -1018,6 +1045,7 @@  static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
 		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
 		skb->sk = NULL;
 	}
+	skb_bpf_set_strparser(skb);
 	sk_psock_verdict_apply(psock, skb, ret);
 out:
 	rcu_read_unlock();