diff mbox series

skmsg: lose offset info in sk_psock_skb_ingress

Message ID 20210915140629.18558-1-liujian56@huawei.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series skmsg: lose offset info in sk_psock_skb_ingress | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Guessed tree name to be net-next
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cc_maintainers warning 6 maintainers not CCed: kpsingh@kernel.org yhs@fb.com andrii@kernel.org songliubraving@fb.com ast@kernel.org kafai@fb.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 3 this patch: 3
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 3 this patch: 3
netdev/header_inline success Link

Commit Message

Liu Jian Sept. 15, 2021, 2:06 p.m. UTC
If sockmap enable strparser, there are lose offset info in
sk_psock_skb_ingress. If the length determined by parse_msg function
is not skb->len, the skb will be converted to sk_msg multiple times,
and userspace app will get the data multiple times.

Fix this by get the offset and length from strp_msg.

Signed-off-by: Liu Jian <liujian56@huawei.com>
---
 net/core/skmsg.c | 49 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 39 insertions(+), 10 deletions(-)

Comments

kernel test robot Sept. 16, 2021, 7:14 a.m. UTC | #1
Hi Liu,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.15-rc1 next-20210915]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Liu-Jian/skmsg-lose-offset-info-in-sk_psock_skb_ingress/20210915-220839
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 3ca706c189db861b2ca2019a0901b94050ca49d8
config: h8300-randconfig-r031-20210916 (attached as .config)
compiler: h8300-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/7736d12e30a2ff3579225cc593898ef1a24dc7aa
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Liu-Jian/skmsg-lose-offset-info-in-sk_psock_skb_ingress/20210915-220839
        git checkout 7736d12e30a2ff3579225cc593898ef1a24dc7aa
        # save the attached .config to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=h8300 SHELL=/bin/bash net/core/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from include/linux/kernel.h:11,
                    from include/linux/list.h:9,
                    from include/linux/timer.h:5,
                    from include/linux/workqueue.h:9,
                    from include/linux/bpf.h:9,
                    from include/linux/skmsg.h:7,
                    from net/core/skmsg.c:4:
   include/linux/scatterlist.h: In function 'sg_set_buf':
   include/asm-generic/page.h:89:51: warning: ordered comparison of pointer with null pointer [-Wextra]
      89 | #define virt_addr_valid(kaddr)  (((void *)(kaddr) >= (void *)PAGE_OFFSET) && \
         |                                                   ^~
   include/linux/compiler.h:78:45: note: in definition of macro 'unlikely'
      78 | # define unlikely(x)    __builtin_expect(!!(x), 0)
         |                                             ^
   include/linux/scatterlist.h:143:9: note: in expansion of macro 'BUG_ON'
     143 |         BUG_ON(!virt_addr_valid(buf));
         |         ^~~~~~
   include/linux/scatterlist.h:143:17: note: in expansion of macro 'virt_addr_valid'
     143 |         BUG_ON(!virt_addr_valid(buf));
         |                 ^~~~~~~~~~~~~~~
   net/core/skmsg.c: In function 'sk_psock_verdict_apply':
>> net/core/skmsg.c:972:25: error: 'len' undeclared (first use in this function)
     972 |                         len = skb->len;
         |                         ^~~
   net/core/skmsg.c:972:25: note: each undeclared identifier is reported only once for each function it appears in
>> net/core/skmsg.c:973:25: error: 'off' undeclared (first use in this function)
     973 |                         off = 0;
         |                         ^~~
   net/core/skmsg.c: At top level:
   net/core/skmsg.c:12:13: warning: 'sk_psock_strp_data_ready' declared 'static' but never defined [-Wunused-function]
      12 | static void sk_psock_strp_data_ready(struct sock *sk);
         |             ^~~~~~~~~~~~~~~~~~~~~~~~


vim +/len +972 net/core/skmsg.c

   953	
   954		switch (verdict) {
   955		case __SK_PASS:
   956			err = -EIO;
   957			sk_other = psock->sk;
   958			if (sock_flag(sk_other, SOCK_DEAD) ||
   959			    !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
   960				goto out_free;
   961			}
   962	
   963			skb_bpf_set_ingress(skb);
   964	
   965			/* If the queue is empty then we can submit directly
   966			 * into the msg queue. If its not empty we have to
   967			 * queue work otherwise we may get OOO data. Otherwise,
   968			 * if sk_psock_skb_ingress errors will be handled by
   969			 * retrying later from workqueue.
   970			 */
   971			if (skb_queue_empty(&psock->ingress_skb)) {
 > 972				len = skb->len;
 > 973				off = 0;
   974	#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
   975				if (psock->sk->sk_data_ready == sk_psock_strp_data_ready) {
   976					stm = strp_msg(skb);
   977					off = stm->offset;
   978					len = stm->full_len;
   979				}
   980	#endif
   981				err = sk_psock_skb_ingress_self(psock, skb, off, len);
   982			}
   983			if (err < 0) {
   984				spin_lock_bh(&psock->ingress_lock);
   985				if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
   986					skb_queue_tail(&psock->ingress_skb, skb);
   987					schedule_work(&psock->work);
   988					err = 0;
   989				}
   990				spin_unlock_bh(&psock->ingress_lock);
   991				if (err < 0) {
   992					skb_bpf_redirect_clear(skb);
   993					goto out_free;
   994				}
   995			}
   996			break;
   997		case __SK_REDIRECT:
   998			err = sk_psock_skb_redirect(psock, skb);
   999			break;
  1000		case __SK_DROP:
  1001		default:
  1002	out_free:
  1003			sock_drop(psock->sk, skb);
  1004		}
  1005	
  1006		return err;
  1007	}
  1008	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 2d6249b28928..83f76e568fad 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -9,6 +9,8 @@ 
 #include <net/tcp.h>
 #include <net/tls.h>
 
+static void sk_psock_strp_data_ready(struct sock *sk);
+
 static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
 {
 	if (msg->sg.end > msg->sg.start &&
@@ -494,6 +496,7 @@  static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
 }
 
 static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
+					u32 off, u32 len,
 					struct sk_psock *psock,
 					struct sock *sk,
 					struct sk_msg *msg)
@@ -507,11 +510,11 @@  static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
 	 */
 	if (skb_linearize(skb))
 		return -EAGAIN;
-	num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
+	num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
 	if (unlikely(num_sge < 0))
 		return num_sge;
 
-	copied = skb->len;
+	copied = len;
 	msg->sg.start = 0;
 	msg->sg.size = copied;
 	msg->sg.end = num_sge;
@@ -522,9 +525,11 @@  static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
 	return copied;
 }
 
-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb);
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+				     u32 off, u32 len);
 
-static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
+				u32 off, u32 len)
 {
 	struct sock *sk = psock->sk;
 	struct sk_msg *msg;
@@ -535,7 +540,7 @@  static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 	 * correctly.
 	 */
 	if (unlikely(skb->sk == sk))
-		return sk_psock_skb_ingress_self(psock, skb);
+		return sk_psock_skb_ingress_self(psock, skb, off, len);
 	msg = sk_psock_create_ingress_msg(sk, skb);
 	if (!msg)
 		return -EAGAIN;
@@ -547,7 +552,7 @@  static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 	 * into user buffers.
 	 */
 	skb_set_owner_r(skb, sk);
-	err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+	err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
 	if (err < 0)
 		kfree(msg);
 	return err;
@@ -557,7 +562,8 @@  static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
  * skb. In this case we do not need to check memory limits or skb_set_owner_r
  * because the skb is already accounted for here.
  */
-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb)
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+				     u32 off, u32 len)
 {
 	struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
 	struct sock *sk = psock->sk;
@@ -567,7 +573,7 @@  static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
 		return -EAGAIN;
 	sk_msg_init(msg);
 	skb_set_owner_r(skb, sk);
-	err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+	err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
 	if (err < 0)
 		kfree(msg);
 	return err;
@@ -581,7 +587,7 @@  static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
 			return -EAGAIN;
 		return skb_send_sock(psock->sk, skb, off, len);
 	}
-	return sk_psock_skb_ingress(psock, skb);
+	return sk_psock_skb_ingress(psock, skb, off, len);
 }
 
 static void sk_psock_skb_state(struct sk_psock *psock,
@@ -604,6 +610,9 @@  static void sk_psock_backlog(struct work_struct *work)
 {
 	struct sk_psock *psock = container_of(work, struct sk_psock, work);
 	struct sk_psock_work_state *state = &psock->work_state;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+	struct strp_msg *stm = NULL;
+#endif
 	struct sk_buff *skb = NULL;
 	bool ingress;
 	u32 len, off;
@@ -624,6 +633,13 @@  static void sk_psock_backlog(struct work_struct *work)
 	while ((skb = skb_dequeue(&psock->ingress_skb))) {
 		len = skb->len;
 		off = 0;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+		if (psock->sk->sk_data_ready == sk_psock_strp_data_ready) {
+			stm = strp_msg(skb);
+			off = stm->offset;
+			len = stm->full_len;
+		}
+#endif
 start:
 		ingress = skb_bpf_ingress(skb);
 		skb_bpf_redirect_clear(skb);
@@ -930,6 +946,10 @@  static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
 {
 	struct sock *sk_other;
 	int err = 0;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+	struct strp_msg *stm = NULL;
+	u32 len, off;
+#endif
 
 	switch (verdict) {
 	case __SK_PASS:
@@ -949,7 +969,16 @@  static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
 		 * retrying later from workqueue.
 		 */
 		if (skb_queue_empty(&psock->ingress_skb)) {
-			err = sk_psock_skb_ingress_self(psock, skb);
+			len = skb->len;
+			off = 0;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+			if (psock->sk->sk_data_ready == sk_psock_strp_data_ready) {
+				stm = strp_msg(skb);
+				off = stm->offset;
+				len = stm->full_len;
+			}
+#endif
+			err = sk_psock_skb_ingress_self(psock, skb, off, len);
 		}
 		if (err < 0) {
 			spin_lock_bh(&psock->ingress_lock);