Message ID | 20210719214834.125484-4-john.fastabend@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | sockmap fixes picked up by stress tests | expand |
On Mon, Jul 19, 2021 at 11:48 PM CEST, John Fastabend wrote: > If backlog handler is running during a tear down operation we may enqueue > data on the ingress msg queue while tear down is trying to free it. > > sk_psock_backlog() > sk_psock_handle_skb() > skb_psock_skb_ingress() > sk_psock_skb_ingress_enqueue() > sk_psock_queue_msg(psock,msg) > spin_lock(ingress_lock) > sk_psock_zap_ingress() > _sk_psock_purge_ingerss_msg() > _sk_psock_purge_ingress_msg() > -- free ingress_msg list -- > spin_unlock(ingress_lock) > spin_lock(ingress_lock) > list_add_tail(msg,ingress_msg) <- entry on list with no on > left to free it. > spin_unlock(ingress_lock) > > To fix we only enqueue from backlog if the ENABLED bit is set. The tear > down logic clears the bit with ingress_lock set so we wont enqueue the > msg in the last step. > > Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()") > Signed-off-by: John Fastabend <john.fastabend@gmail.com> > --- > include/linux/skmsg.h | 54 ++++++++++++++++++++++++++++--------------- > net/core/skmsg.c | 6 ----- > 2 files changed, 35 insertions(+), 25 deletions(-) > > diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h > index 96f319099744..883638888f93 100644 > --- a/include/linux/skmsg.h > +++ b/include/linux/skmsg.h > @@ -285,11 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk) > return rcu_dereference_sk_user_data(sk); > } > > +static inline void sk_psock_set_state(struct sk_psock *psock, > + enum sk_psock_state_bits bit) > +{ > + set_bit(bit, &psock->state); > +} > + > +static inline void sk_psock_clear_state(struct sk_psock *psock, > + enum sk_psock_state_bits bit) > +{ > + clear_bit(bit, &psock->state); > +} > + > +static inline bool sk_psock_test_state(const struct sk_psock *psock, > + enum sk_psock_state_bits bit) > +{ > + return test_bit(bit, &psock->state); > +} > + > +static void sock_drop(struct sock *sk, struct sk_buff *skb) > +{ > + sk_drops_add(sk, skb); > + kfree_skb(skb); > +} > + > +static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg) > +{ > + if (msg->skb) > + sock_drop(psock->sk, msg->skb); > + kfree(msg); > +} > + > static inline void sk_psock_queue_msg(struct sk_psock *psock, > struct sk_msg *msg) > { > spin_lock_bh(&psock->ingress_lock); > - list_add_tail(&msg->list, &psock->ingress_msg); > + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) Whitespace issue ^. Otherwise LGTM. > + list_add_tail(&msg->list, &psock->ingress_msg); > + else > + drop_sk_msg(psock, msg); > spin_unlock_bh(&psock->ingress_lock); > } > [...]
On Mon, Jul 19, 2021 at 02:48:34PM -0700, John Fastabend wrote: > If backlog handler is running during a tear down operation we may enqueue > data on the ingress msg queue while tear down is trying to free it. > > sk_psock_backlog() > sk_psock_handle_skb() > skb_psock_skb_ingress() > sk_psock_skb_ingress_enqueue() > sk_psock_queue_msg(psock,msg) > spin_lock(ingress_lock) > sk_psock_zap_ingress() > _sk_psock_purge_ingerss_msg() > _sk_psock_purge_ingress_msg() > -- free ingress_msg list -- > spin_unlock(ingress_lock) > spin_lock(ingress_lock) > list_add_tail(msg,ingress_msg) <- entry on list with no on s/on/one/ > left to free it. > spin_unlock(ingress_lock) > > To fix we only enqueue from backlog if the ENABLED bit is set. The tear > down logic clears the bit with ingress_lock set so we wont enqueue the > msg in the last step. > > Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()") > Signed-off-by: John Fastabend <john.fastabend@gmail.com> > --- > include/linux/skmsg.h | 54 ++++++++++++++++++++++++++++--------------- > net/core/skmsg.c | 6 ----- > 2 files changed, 35 insertions(+), 25 deletions(-) > > diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h > index 96f319099744..883638888f93 100644 > --- a/include/linux/skmsg.h > +++ b/include/linux/skmsg.h > @@ -285,11 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk) > return rcu_dereference_sk_user_data(sk); > } > > +static inline void sk_psock_set_state(struct sk_psock *psock, > + enum sk_psock_state_bits bit) > +{ > + set_bit(bit, &psock->state); > +} > + > +static inline void sk_psock_clear_state(struct sk_psock *psock, > + enum sk_psock_state_bits bit) > +{ > + clear_bit(bit, &psock->state); > +} > + > +static inline bool sk_psock_test_state(const struct sk_psock *psock, > + enum sk_psock_state_bits bit) > +{ > + return test_bit(bit, &psock->state); > +} > + > +static void sock_drop(struct sock *sk, struct sk_buff *skb) inline > +{ > + sk_drops_add(sk, skb); > + kfree_skb(skb); > +} > + > +static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg) > +{ > + if (msg->skb) > + sock_drop(psock->sk, msg->skb); > + kfree(msg); > +} > + > static inline void sk_psock_queue_msg(struct sk_psock *psock, > struct sk_msg *msg) > { > spin_lock_bh(&psock->ingress_lock); > - list_add_tail(&msg->list, &psock->ingress_msg); > + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) Indentation is off.
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 96f319099744..883638888f93 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -285,11 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk) return rcu_dereference_sk_user_data(sk); } +static inline void sk_psock_set_state(struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + set_bit(bit, &psock->state); +} + +static inline void sk_psock_clear_state(struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + clear_bit(bit, &psock->state); +} + +static inline bool sk_psock_test_state(const struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + return test_bit(bit, &psock->state); +} + +static void sock_drop(struct sock *sk, struct sk_buff *skb) +{ + sk_drops_add(sk, skb); + kfree_skb(skb); +} + +static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg) +{ + if (msg->skb) + sock_drop(psock->sk, msg->skb); + kfree(msg); +} + static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { spin_lock_bh(&psock->ingress_lock); - list_add_tail(&msg->list, &psock->ingress_msg); + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + list_add_tail(&msg->list, &psock->ingress_msg); + else + drop_sk_msg(psock, msg); spin_unlock_bh(&psock->ingress_lock); } @@ -406,24 +440,6 @@ static inline void sk_psock_restore_proto(struct sock *sk, psock->psock_update_sk_prot(sk, psock, true); } -static inline void sk_psock_set_state(struct sk_psock *psock, - enum sk_psock_state_bits bit) -{ - set_bit(bit, &psock->state); -} - -static inline void sk_psock_clear_state(struct sk_psock *psock, - enum sk_psock_state_bits bit) -{ - clear_bit(bit, &psock->state); -} - -static inline bool sk_psock_test_state(const struct sk_psock *psock, - enum sk_psock_state_bits bit) -{ - return test_bit(bit, &psock->state); -} - static inline struct sk_psock *sk_psock_get(struct sock *sk) { struct sk_psock *psock; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 5d956e91d05a..3ee407bed768 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -584,12 +584,6 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, return sk_psock_skb_ingress(psock, skb); } -static void sock_drop(struct sock *sk, struct sk_buff *skb) -{ - sk_drops_add(sk, skb); - kfree_skb(skb); -} - static void sk_psock_skb_state(struct sk_psock *psock, struct sk_psock_work_state *state, struct sk_buff *skb,
If backlog handler is running during a tear down operation we may enqueue data on the ingress msg queue while tear down is trying to free it. sk_psock_backlog() sk_psock_handle_skb() skb_psock_skb_ingress() sk_psock_skb_ingress_enqueue() sk_psock_queue_msg(psock,msg) spin_lock(ingress_lock) sk_psock_zap_ingress() _sk_psock_purge_ingerss_msg() _sk_psock_purge_ingress_msg() -- free ingress_msg list -- spin_unlock(ingress_lock) spin_lock(ingress_lock) list_add_tail(msg,ingress_msg) <- entry on list with no on left to free it. spin_unlock(ingress_lock) To fix we only enqueue from backlog if the ENABLED bit is set. The tear down logic clears the bit with ingress_lock set so we wont enqueue the msg in the last step. Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()") Signed-off-by: John Fastabend <john.fastabend@gmail.com> --- include/linux/skmsg.h | 54 ++++++++++++++++++++++++++++--------------- net/core/skmsg.c | 6 ----- 2 files changed, 35 insertions(+), 25 deletions(-)