diff mbox series

[bpf,3/3] bpf, sockmap: fix memleak on ingress msg enqueue

Message ID 20210719214834.125484-4-john.fastabend@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series sockmap fixes picked up by stress tests | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf
netdev/subject_prefix success Link
netdev/cc_maintainers fail 2 blamed authors not CCed: ast@kernel.org cong.wang@bytedance.com; 10 maintainers not CCed: yhs@fb.com kpsingh@kernel.org andrii@kernel.org cong.wang@bytedance.com kafai@fb.com ast@kernel.org lmb@cloudflare.com songliubraving@fb.com davem@davemloft.net kuba@kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 69 this patch: 69
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch fail ERROR: code indent should use tabs where possible WARNING: 'wont' may be misspelled - perhaps 'won't'? WARNING: please, no spaces at the start of a line
netdev/build_allmodconfig_warn success Errors and warnings before: 69 this patch: 69
netdev/header_inline fail Link

Commit Message

John Fastabend July 19, 2021, 9:48 p.m. UTC
If backlog handler is running during a tear down operation we may enqueue
data on the ingress msg queue while tear down is trying to free it.

 sk_psock_backlog()
   sk_psock_handle_skb()
     skb_psock_skb_ingress()
       sk_psock_skb_ingress_enqueue()
         sk_psock_queue_msg(psock,msg)
                                           spin_lock(ingress_lock)
                                            sk_psock_zap_ingress()
                                             _sk_psock_purge_ingerss_msg()
                                              _sk_psock_purge_ingress_msg()
                                            -- free ingress_msg list --
                                           spin_unlock(ingress_lock)
           spin_lock(ingress_lock)
           list_add_tail(msg,ingress_msg) <- entry on list with no on
                                             left to free it.
           spin_unlock(ingress_lock)

To fix we only enqueue from backlog if the ENABLED bit is set. The tear
down logic clears the bit with ingress_lock set so we wont enqueue the
msg in the last step.

Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 include/linux/skmsg.h | 54 ++++++++++++++++++++++++++++---------------
 net/core/skmsg.c      |  6 -----
 2 files changed, 35 insertions(+), 25 deletions(-)

Comments

Jakub Sitnicki July 21, 2021, 4:36 p.m. UTC | #1
On Mon, Jul 19, 2021 at 11:48 PM CEST, John Fastabend wrote:
> If backlog handler is running during a tear down operation we may enqueue
> data on the ingress msg queue while tear down is trying to free it.
>
>  sk_psock_backlog()
>    sk_psock_handle_skb()
>      skb_psock_skb_ingress()
>        sk_psock_skb_ingress_enqueue()
>          sk_psock_queue_msg(psock,msg)
>                                            spin_lock(ingress_lock)
>                                             sk_psock_zap_ingress()
>                                              _sk_psock_purge_ingerss_msg()
>                                               _sk_psock_purge_ingress_msg()
>                                             -- free ingress_msg list --
>                                            spin_unlock(ingress_lock)
>            spin_lock(ingress_lock)
>            list_add_tail(msg,ingress_msg) <- entry on list with no on
>                                              left to free it.
>            spin_unlock(ingress_lock)
>
> To fix we only enqueue from backlog if the ENABLED bit is set. The tear
> down logic clears the bit with ingress_lock set so we wont enqueue the
> msg in the last step.
>
> Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()")
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> ---
>  include/linux/skmsg.h | 54 ++++++++++++++++++++++++++++---------------
>  net/core/skmsg.c      |  6 -----
>  2 files changed, 35 insertions(+), 25 deletions(-)
>
> diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
> index 96f319099744..883638888f93 100644
> --- a/include/linux/skmsg.h
> +++ b/include/linux/skmsg.h
> @@ -285,11 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk)
>  	return rcu_dereference_sk_user_data(sk);
>  }
>  
> +static inline void sk_psock_set_state(struct sk_psock *psock,
> +				      enum sk_psock_state_bits bit)
> +{
> +	set_bit(bit, &psock->state);
> +}
> +
> +static inline void sk_psock_clear_state(struct sk_psock *psock,
> +					enum sk_psock_state_bits bit)
> +{
> +	clear_bit(bit, &psock->state);
> +}
> +
> +static inline bool sk_psock_test_state(const struct sk_psock *psock,
> +				       enum sk_psock_state_bits bit)
> +{
> +	return test_bit(bit, &psock->state);
> +}
> +
> +static void sock_drop(struct sock *sk, struct sk_buff *skb)
> +{
> +	sk_drops_add(sk, skb);
> +	kfree_skb(skb);
> +}
> +
> +static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg)
> +{
> +	if (msg->skb)
> +		sock_drop(psock->sk, msg->skb);
> +	kfree(msg);
> +}
> +
>  static inline void sk_psock_queue_msg(struct sk_psock *psock,
>  				      struct sk_msg *msg)
>  {
>  	spin_lock_bh(&psock->ingress_lock);
> -	list_add_tail(&msg->list, &psock->ingress_msg);
> +        if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))


Whitespace issue ^. Otherwise LGTM.

> +		list_add_tail(&msg->list, &psock->ingress_msg);
> +	else
> +		drop_sk_msg(psock, msg);
>  	spin_unlock_bh(&psock->ingress_lock);
>  }
>  

[...]
Martin KaFai Lau July 21, 2021, 6:02 p.m. UTC | #2
On Mon, Jul 19, 2021 at 02:48:34PM -0700, John Fastabend wrote:
> If backlog handler is running during a tear down operation we may enqueue
> data on the ingress msg queue while tear down is trying to free it.
> 
>  sk_psock_backlog()
>    sk_psock_handle_skb()
>      skb_psock_skb_ingress()
>        sk_psock_skb_ingress_enqueue()
>          sk_psock_queue_msg(psock,msg)
>                                            spin_lock(ingress_lock)
>                                             sk_psock_zap_ingress()
>                                              _sk_psock_purge_ingerss_msg()
>                                               _sk_psock_purge_ingress_msg()
>                                             -- free ingress_msg list --
>                                            spin_unlock(ingress_lock)
>            spin_lock(ingress_lock)
>            list_add_tail(msg,ingress_msg) <- entry on list with no on
s/on/one/

>                                              left to free it.
>            spin_unlock(ingress_lock)
> 
> To fix we only enqueue from backlog if the ENABLED bit is set. The tear
> down logic clears the bit with ingress_lock set so we wont enqueue the
> msg in the last step.
> 
> Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()")
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> ---
>  include/linux/skmsg.h | 54 ++++++++++++++++++++++++++++---------------
>  net/core/skmsg.c      |  6 -----
>  2 files changed, 35 insertions(+), 25 deletions(-)
> 
> diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
> index 96f319099744..883638888f93 100644
> --- a/include/linux/skmsg.h
> +++ b/include/linux/skmsg.h
> @@ -285,11 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk)
>  	return rcu_dereference_sk_user_data(sk);
>  }
>  
> +static inline void sk_psock_set_state(struct sk_psock *psock,
> +				      enum sk_psock_state_bits bit)
> +{
> +	set_bit(bit, &psock->state);
> +}
> +
> +static inline void sk_psock_clear_state(struct sk_psock *psock,
> +					enum sk_psock_state_bits bit)
> +{
> +	clear_bit(bit, &psock->state);
> +}
> +
> +static inline bool sk_psock_test_state(const struct sk_psock *psock,
> +				       enum sk_psock_state_bits bit)
> +{
> +	return test_bit(bit, &psock->state);
> +}
> +
> +static void sock_drop(struct sock *sk, struct sk_buff *skb)
inline

> +{
> +	sk_drops_add(sk, skb);
> +	kfree_skb(skb);
> +}
> +
> +static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg)
> +{
> +	if (msg->skb)
> +		sock_drop(psock->sk, msg->skb);
> +	kfree(msg);
> +}
> +
>  static inline void sk_psock_queue_msg(struct sk_psock *psock,
>  				      struct sk_msg *msg)
>  {
>  	spin_lock_bh(&psock->ingress_lock);
> -	list_add_tail(&msg->list, &psock->ingress_msg);
> +        if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
Indentation is off.
diff mbox series

Patch

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 96f319099744..883638888f93 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -285,11 +285,45 @@  static inline struct sk_psock *sk_psock(const struct sock *sk)
 	return rcu_dereference_sk_user_data(sk);
 }
 
+static inline void sk_psock_set_state(struct sk_psock *psock,
+				      enum sk_psock_state_bits bit)
+{
+	set_bit(bit, &psock->state);
+}
+
+static inline void sk_psock_clear_state(struct sk_psock *psock,
+					enum sk_psock_state_bits bit)
+{
+	clear_bit(bit, &psock->state);
+}
+
+static inline bool sk_psock_test_state(const struct sk_psock *psock,
+				       enum sk_psock_state_bits bit)
+{
+	return test_bit(bit, &psock->state);
+}
+
+static void sock_drop(struct sock *sk, struct sk_buff *skb)
+{
+	sk_drops_add(sk, skb);
+	kfree_skb(skb);
+}
+
+static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg)
+{
+	if (msg->skb)
+		sock_drop(psock->sk, msg->skb);
+	kfree(msg);
+}
+
 static inline void sk_psock_queue_msg(struct sk_psock *psock,
 				      struct sk_msg *msg)
 {
 	spin_lock_bh(&psock->ingress_lock);
-	list_add_tail(&msg->list, &psock->ingress_msg);
+        if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+		list_add_tail(&msg->list, &psock->ingress_msg);
+	else
+		drop_sk_msg(psock, msg);
 	spin_unlock_bh(&psock->ingress_lock);
 }
 
@@ -406,24 +440,6 @@  static inline void sk_psock_restore_proto(struct sock *sk,
 		psock->psock_update_sk_prot(sk, psock, true);
 }
 
-static inline void sk_psock_set_state(struct sk_psock *psock,
-				      enum sk_psock_state_bits bit)
-{
-	set_bit(bit, &psock->state);
-}
-
-static inline void sk_psock_clear_state(struct sk_psock *psock,
-					enum sk_psock_state_bits bit)
-{
-	clear_bit(bit, &psock->state);
-}
-
-static inline bool sk_psock_test_state(const struct sk_psock *psock,
-				       enum sk_psock_state_bits bit)
-{
-	return test_bit(bit, &psock->state);
-}
-
 static inline struct sk_psock *sk_psock_get(struct sock *sk)
 {
 	struct sk_psock *psock;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 5d956e91d05a..3ee407bed768 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -584,12 +584,6 @@  static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
 	return sk_psock_skb_ingress(psock, skb);
 }
 
-static void sock_drop(struct sock *sk, struct sk_buff *skb)
-{
-	sk_drops_add(sk, skb);
-	kfree_skb(skb);
-}
-
 static void sk_psock_skb_state(struct sk_psock *psock,
 			       struct sk_psock_work_state *state,
 			       struct sk_buff *skb,