Message ID | 20250320121604.3342831-1-edumazet@google.com (mailing list archive) |
---|---|
State | New |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net-next] tcp: avoid atomic operations on sk->sk_rmem_alloc | expand |
On Thu, Mar 20, 2025 at 8:16 AM Eric Dumazet <edumazet@google.com> wrote: > > TCP uses generic skb_set_owner_r() and sock_rfree() > for received packets, with socket lock being owned. > > Switch to private versions, avoiding two atomic operations > per packet. > > Signed-off-by: Eric Dumazet <edumazet@google.com> > --- > include/net/tcp.h | 15 +++++++++++++++ > net/ipv4/tcp.c | 18 ++++++++++++++++-- > net/ipv4/tcp_fastopen.c | 2 +- > net/ipv4/tcp_input.c | 6 +++--- > 4 files changed, 35 insertions(+), 6 deletions(-) > > diff --git a/include/net/tcp.h b/include/net/tcp.h > index d08fbf90495de69b157d3c87c50e82d781a365df..dd6d63a6f42b99774e9461b69d3e7932cf629082 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h Very nice. Thanks! Reviewed-by: Neal Cardwell <ncardwell@google.com> A couple quick thoughts: > @@ -779,6 +779,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) > > /* tcp.c */ > void tcp_get_info(struct sock *, struct tcp_info *); > +void tcp_sock_rfree(struct sk_buff *skb); > > /* Read 'sendfile()'-style from a TCP socket */ > int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, > @@ -2898,4 +2899,18 @@ enum skb_drop_reason tcp_inbound_hash(struct sock *sk, > const void *saddr, const void *daddr, > int family, int dif, int sdif); > > +/* version of skb_set_owner_r() avoiding one atomic_add() */ > +static inline void tcp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) > +{ > + skb_orphan(skb); > + skb->sk = sk; > + skb->destructor = tcp_sock_rfree; > + > + sock_owned_by_me(sk); > + atomic_set(&sk->sk_rmem_alloc, > + atomic_read(&sk->sk_rmem_alloc) + skb->truesize); > + > + sk_forward_alloc_add(sk, -skb->truesize); > +} > + > #endif /* _TCP_H */ > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c > index 989c3c3d8e757361a0ac4a9f039a3cfca10d9612..b1306038b8e6e8c55fd1b4803c5d8ca626491aae 100644 > --- a/net/ipv4/tcp.c > +++ b/net/ipv4/tcp.c > @@ -1525,11 +1525,25 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) > __tcp_cleanup_rbuf(sk, copied); > } > > +/* private version of sock_rfree() avoiding one atomic_sub() */ > +void tcp_sock_rfree(struct sk_buff *skb) > +{ > + struct sock *sk = skb->sk; > + unsigned int len = skb->truesize; > + > + sock_owned_by_me(sk); > + atomic_set(&sk->sk_rmem_alloc, > + atomic_read(&sk->sk_rmem_alloc) - len); > + > + sk_forward_alloc_add(sk, len); > + sk_mem_reclaim(sk); One thought on readability: it might be nice to make these functions both use skb->truesize rather than having one use skb->truesize and one use len (particularly since "len" in the skb context often refers to the payload length). I realize the "len" helper variable was inherited from sock_rfree() but it might be nice to make the TCP versions easier to read and audit? Also, it might be nice to have the comments above tcp_skb_set_owner_r() and tcp_sock_rfree() reference the other function, so maintainers can be reminded of the fact that the arithmetic in the two functions needs to be kept exactly in sync? Perhaps something like: /* A version of skb_set_owner_r() avoiding one atomic_add(). * These adjustments are later inverted by tcp_sock_rfree(). */ static inline void tcp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) ... /* A private version of sock_rfree() avoiding one atomic_sub(). * Inverts the earlier adjustments made by tcp_skb_set_owner_r(). */ void tcp_sock_rfree(struct sk_buff *skb) Anyway, the patches LGTM. Those were just some thoughts. :-) Thanks! neal
diff --git a/include/net/tcp.h b/include/net/tcp.h index d08fbf90495de69b157d3c87c50e82d781a365df..dd6d63a6f42b99774e9461b69d3e7932cf629082 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -779,6 +779,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) /* tcp.c */ void tcp_get_info(struct sock *, struct tcp_info *); +void tcp_sock_rfree(struct sk_buff *skb); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, @@ -2898,4 +2899,18 @@ enum skb_drop_reason tcp_inbound_hash(struct sock *sk, const void *saddr, const void *daddr, int family, int dif, int sdif); +/* version of skb_set_owner_r() avoiding one atomic_add() */ +static inline void tcp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) +{ + skb_orphan(skb); + skb->sk = sk; + skb->destructor = tcp_sock_rfree; + + sock_owned_by_me(sk); + atomic_set(&sk->sk_rmem_alloc, + atomic_read(&sk->sk_rmem_alloc) + skb->truesize); + + sk_forward_alloc_add(sk, -skb->truesize); +} + #endif /* _TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 989c3c3d8e757361a0ac4a9f039a3cfca10d9612..b1306038b8e6e8c55fd1b4803c5d8ca626491aae 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1525,11 +1525,25 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) __tcp_cleanup_rbuf(sk, copied); } +/* private version of sock_rfree() avoiding one atomic_sub() */ +void tcp_sock_rfree(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + unsigned int len = skb->truesize; + + sock_owned_by_me(sk); + atomic_set(&sk->sk_rmem_alloc, + atomic_read(&sk->sk_rmem_alloc) - len); + + sk_forward_alloc_add(sk, len); + sk_mem_reclaim(sk); +} + static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); - if (likely(skb->destructor == sock_rfree)) { - sock_rfree(skb); + if (likely(skb->destructor == tcp_sock_rfree)) { + tcp_sock_rfree(skb); skb->destructor = NULL; skb->sk = NULL; return skb_attempt_defer_free(skb); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 1a6b1bc5424514e27a99cbb2fcedf001afd51d98..ca40665145c692ce0de518886bb366406606f7ac 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -189,7 +189,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) tcp_segs_in(tp, skb); __skb_pull(skb, tcp_hdrlen(skb)); sk_forced_mem_schedule(sk, skb->truesize); - skb_set_owner_r(skb, sk); + tcp_skb_set_owner_r(skb, sk); TCP_SKB_CB(skb)->seq++; TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 72382ee4456dbd89fd1b69f3bdbf6b9c8ef5aa78..cac6e86196975cc793f499126302fc5220a1dfc7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5171,7 +5171,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (tcp_is_sack(tp)) tcp_grow_window(sk, skb, false); skb_condense(skb); - skb_set_owner_r(skb, sk); + tcp_skb_set_owner_r(skb, sk); } } @@ -5187,7 +5187,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { tcp_add_receive_queue(sk, skb); - skb_set_owner_r(skb, sk); + tcp_skb_set_owner_r(skb, sk); } return eaten; } @@ -5504,7 +5504,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, __skb_queue_before(list, skb, nskb); else __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */ - skb_set_owner_r(nskb, sk); + tcp_skb_set_owner_r(nskb, sk); mptcp_skb_ext_move(nskb, skb); /* Copy data, releasing collapsed skbs. */
TCP uses generic skb_set_owner_r() and sock_rfree() for received packets, with socket lock being owned. Switch to private versions, avoiding two atomic operations per packet. Signed-off-by: Eric Dumazet <edumazet@google.com> --- include/net/tcp.h | 15 +++++++++++++++ net/ipv4/tcp.c | 18 ++++++++++++++++-- net/ipv4/tcp_fastopen.c | 2 +- net/ipv4/tcp_input.c | 6 +++--- 4 files changed, 35 insertions(+), 6 deletions(-)