[v2,mptcp-next] mptcp: drop tx skb cache

Message ID	8dd3343ea83a7a631acfe4940a9dea841b32c52a.1621613936.git.pabeni@redhat.com (mailing list archive)
State	Superseded, archived
Delegated to:	Mat Martineau
Headers	show Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [216.205.24.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 79D8970 for <mptcp@lists.linux.dev>; Fri, 21 May 2021 16:19:53 +0000 (UTC) From: Paolo Abeni <pabeni@redhat.com> To: mptcp@lists.linux.dev Subject: [PATCH v2 mptcp-next] mptcp: drop tx skb cache Date: Fri, 21 May 2021 18:19:20 +0200 Message-Id: <8dd3343ea83a7a631acfe4940a9dea841b32c52a.1621613936.git.pabeni@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="US-ASCII"
Series	[v2,mptcp-next] mptcp: drop tx skb cache \| expand [v2,mptcp-next] mptcp: drop tx skb cache

Context	Check	Description
matttbe/KVM_Validation__normal	success	Success! ✅
matttbe/KVM_Validation__debug	success	Success! ✅
matttbe/KVM_Validation__btf__only_bpftest_all_	success	Success! ✅

Paolo Abeni May 21, 2021, 4:19 p.m. UTC

The mentioned cache was introduced to reduce the number of skb
allocation in atomic context, but the required complexity is
excessive.

This change remove the mentioned cache.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
v1 -> v2:
 - drop unused field skb_tx_cache
---
 net/mptcp/protocol.c | 89 ++------------------------------------------
 net/mptcp/protocol.h |  1 -
 2 files changed, 4 insertions(+), 86 deletions(-)

Mat Martineau May 21, 2021, 10:57 p.m. UTC | #1

On Fri, 21 May 2021, Paolo Abeni wrote:

> The mentioned cache was introduced to reduce the number of skb
> allocation in atomic context, but the required complexity is
> excessive.
>
> This change remove the mentioned cache.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> v1 -> v2:
> - drop unused field skb_tx_cache

There's also a size_goal_cache field in struct mptcp_sock that's now 
unused.

I like the simplification! Do the self tests trigger enough memory 
pressure to test allocation in that scenario?

The commit that introduced the skb_tx_cache did so to allow removal of the 
skb_ext cache. It looks like both caches are now unnecessary because the 
skb and skb_ext allocations are handled together in the 
__mptcp_{do_,}alloc_tx_skb() functions - so an allocation failure can be 
handled without the complexity of the caches. Does that match your 
understanding? If so, it would be helpful information for the commit 
message.


Thanks -

Mat


> ---
> net/mptcp/protocol.c | 89 ++------------------------------------------
> net/mptcp/protocol.h |  1 -
> 2 files changed, 4 insertions(+), 86 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 446acfb85493..1114a914d845 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -903,22 +903,14 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
> 		df->data_seq + df->data_len == msk->write_seq;
> }
>
> -static int mptcp_wmem_with_overhead(struct sock *sk, int size)
> +static int mptcp_wmem_with_overhead(int size)
> {
> -	struct mptcp_sock *msk = mptcp_sk(sk);
> -	int ret, skbs;
> -
> -	ret = size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
> -	skbs = (msk->tx_pending_data + size) / msk->size_goal_cache;
> -	if (skbs < msk->skb_tx_cache.qlen)
> -		return ret;
> -
> -	return ret + (skbs - msk->skb_tx_cache.qlen) * SKB_TRUESIZE(MAX_TCP_HEADER);
> +	return size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
> }
>
> static void __mptcp_wmem_reserve(struct sock *sk, int size)
> {
> -	int amount = mptcp_wmem_with_overhead(sk, size);
> +	int amount = mptcp_wmem_with_overhead(size);
> 	struct mptcp_sock *msk = mptcp_sk(sk);
>
> 	WARN_ON_ONCE(msk->wmem_reserved);
> @@ -1213,49 +1205,8 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
> 	return NULL;
> }
>
> -static bool mptcp_tx_cache_refill(struct sock *sk, int size,
> -				  struct sk_buff_head *skbs, int *total_ts)
> -{
> -	struct mptcp_sock *msk = mptcp_sk(sk);
> -	struct sk_buff *skb;
> -	int space_needed;
> -
> -	if (unlikely(tcp_under_memory_pressure(sk))) {
> -		mptcp_mem_reclaim_partial(sk);
> -
> -		/* under pressure pre-allocate at most a single skb */
> -		if (msk->skb_tx_cache.qlen)
> -			return true;
> -		space_needed = msk->size_goal_cache;
> -	} else {
> -		space_needed = msk->tx_pending_data + size -
> -			       msk->skb_tx_cache.qlen * msk->size_goal_cache;
> -	}
> -
> -	while (space_needed > 0) {
> -		skb = __mptcp_do_alloc_tx_skb(sk, sk->sk_allocation);
> -		if (unlikely(!skb)) {
> -			/* under memory pressure, try to pass the caller a
> -			 * single skb to allow forward progress
> -			 */
> -			while (skbs->qlen > 1) {
> -				skb = __skb_dequeue_tail(skbs);
> -				*total_ts -= skb->truesize;
> -				__kfree_skb(skb);
> -			}
> -			return skbs->qlen > 0;
> -		}
> -
> -		*total_ts += skb->truesize;
> -		__skb_queue_tail(skbs, skb);
> -		space_needed -= msk->size_goal_cache;
> -	}
> -	return true;
> -}
> -
> static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
> {
> -	struct mptcp_sock *msk = mptcp_sk(sk);
> 	struct sk_buff *skb;
>
> 	if (ssk->sk_tx_skb_cache) {
> @@ -1266,22 +1217,6 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
> 		return true;
> 	}
>
> -	skb = skb_peek(&msk->skb_tx_cache);
> -	if (skb) {
> -		if (likely(sk_wmem_schedule(ssk, skb->truesize))) {
> -			skb = __skb_dequeue(&msk->skb_tx_cache);
> -			if (WARN_ON_ONCE(!skb))
> -				return false;
> -
> -			mptcp_wmem_uncharge(sk, skb->truesize);
> -			ssk->sk_tx_skb_cache = skb;
> -			return true;
> -		}
> -
> -		/* over memory limit, no point to try to allocate a new skb */
> -		return false;
> -	}
> -
> 	skb = __mptcp_do_alloc_tx_skb(sk, gfp);
> 	if (!skb)
> 		return false;
> @@ -1297,7 +1232,6 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
> static bool mptcp_must_reclaim_memory(struct sock *sk, struct sock *ssk)
> {
> 	return !ssk->sk_tx_skb_cache &&
> -	       !skb_peek(&mptcp_sk(sk)->skb_tx_cache) &&
> 	       tcp_under_memory_pressure(sk);
> }
>
> @@ -1340,7 +1274,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
> 	/* compute send limit */
> 	info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
> 	avail_size = info->size_goal;
> -	msk->size_goal_cache = info->size_goal;
> 	skb = tcp_write_queue_tail(ssk);
> 	if (skb) {
> 		/* Limit the write to the size available in the
> @@ -1689,7 +1622,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> 	while (msg_data_left(msg)) {
> 		int total_ts, frag_truesize = 0;
> 		struct mptcp_data_frag *dfrag;
> -		struct sk_buff_head skbs;
> 		bool dfrag_collapsed;
> 		size_t psize, offset;
>
> @@ -1722,16 +1654,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> 		psize = pfrag->size - offset;
> 		psize = min_t(size_t, psize, msg_data_left(msg));
> 		total_ts = psize + frag_truesize;
> -		__skb_queue_head_init(&skbs);
> -		if (!mptcp_tx_cache_refill(sk, psize, &skbs, &total_ts))
> -			goto wait_for_memory;
>
> -		if (!mptcp_wmem_alloc(sk, total_ts)) {
> -			__skb_queue_purge(&skbs);
> +		if (!mptcp_wmem_alloc(sk, total_ts))
> 			goto wait_for_memory;
> -		}
>
> -		skb_queue_splice_tail(&skbs, &msk->skb_tx_cache);
> 		if (copy_page_from_iter(dfrag->page, offset, psize,
> 					&msg->msg_iter) != psize) {
> 			mptcp_wmem_uncharge(sk, psize + frag_truesize);
> @@ -2460,13 +2386,11 @@ static int __mptcp_init_sock(struct sock *sk)
> 	INIT_LIST_HEAD(&msk->rtx_queue);
> 	INIT_WORK(&msk->work, mptcp_worker);
> 	__skb_queue_head_init(&msk->receive_queue);
> -	__skb_queue_head_init(&msk->skb_tx_cache);
> 	msk->out_of_order_queue = RB_ROOT;
> 	msk->first_pending = NULL;
> 	msk->wmem_reserved = 0;
> 	msk->rmem_released = 0;
> 	msk->tx_pending_data = 0;
> -	msk->size_goal_cache = TCP_BASE_MSS;
>
> 	msk->ack_hint = NULL;
> 	msk->first = NULL;
> @@ -2527,15 +2451,10 @@ static void __mptcp_clear_xmit(struct sock *sk)
> {
> 	struct mptcp_sock *msk = mptcp_sk(sk);
> 	struct mptcp_data_frag *dtmp, *dfrag;
> -	struct sk_buff *skb;
>
> 	WRITE_ONCE(msk->first_pending, NULL);
> 	list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list)
> 		dfrag_clear(sk, dfrag);
> -	while ((skb = __skb_dequeue(&msk->skb_tx_cache)) != NULL) {
> -		sk->sk_forward_alloc += skb->truesize;
> -		kfree_skb(skb);
> -	}
> }
>
> static void mptcp_cancel_work(struct sock *sk)
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 868e878af526..0ad1e1d08667 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -245,7 +245,6 @@ struct mptcp_sock {
> 	struct sk_buff  *ooo_last_skb;
> 	struct rb_root  out_of_order_queue;
> 	struct sk_buff_head receive_queue;
> -	struct sk_buff_head skb_tx_cache;	/* this is wmem accounted */
> 	int		tx_pending_data;
> 	int		size_goal_cache;
> 	struct list_head conn_list;
> -- 
> 2.26.3
>
>
>

--
Mat Martineau
Intel

Paolo Abeni May 26, 2021, 11:06 a.m. UTC | #2

On Fri, 2021-05-21 at 15:57 -0700, Mat Martineau wrote:
> On Fri, 21 May 2021, Paolo Abeni wrote:
> 
> > The mentioned cache was introduced to reduce the number of skb
> > allocation in atomic context, but the required complexity is
> > excessive.
> > 
> > This change remove the mentioned cache.
> > 
> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > ---
> > v1 -> v2:
> > - drop unused field skb_tx_cache
> 
> There's also a size_goal_cache field in struct mptcp_sock that's now 
> unused.

yep, will do in v3 (sorry, I lost track of this review)

> I like the simplification! Do the self tests trigger enough memory 
> pressure to test allocation in that scenario?

Will ensure that before posting v3

> The commit that introduced the skb_tx_cache did so to allow removal of the 
> skb_ext cache. It looks like both caches are now unnecessary because the 
> skb and skb_ext allocations are handled together in the 
> __mptcp_{do_,}alloc_tx_skb() functions - so an allocation failure can be 
> handled without the complexity of the caches.

The cache goal was avoid/reduce skb allocation in atomic context. That
is still true (e.g. with skb_tx_cache we have less skb allocation in
atomic context, expecially when the transfer is link/pacing/peer
limited). Plain TCP can allocare skbs in atomic context, but that does
not happen often. MPTCP will do that almost for each skb,
when link/pacing/peer limited.

I think we can ignore the above, as e.g. high performance NIC device
driver always allocate skb in atomic context.

>  Does that match your 
> understanding? If so, it would be helpful information for the commit 
> message.

The main trick, imho is that we still use the sk->sk_tx_skb_cache to
prevent the TCP stack from allocating extension-less skb. That trick
was introduced with the mptcp tx_cache.

Not sure if the above clarify anything ?!?

Cheers,

Paolo

Paolo Abeni May 26, 2021, 3:23 p.m. UTC | #3

On Wed, 2021-05-26 at 13:06 +0200, Paolo Abeni wrote:
> On Fri, 2021-05-21 at 15:57 -0700, Mat Martineau wrote:
> > On Fri, 21 May 2021, Paolo Abeni wrote:
> > 
> > > The mentioned cache was introduced to reduce the number of skb
> > > allocation in atomic context, but the required complexity is
> > > excessive.
> > > 
> > > This change remove the mentioned cache.
> > > 
> > > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > > ---
> > > v1 -> v2:
> > > - drop unused field skb_tx_cache
> > 
> > There's also a size_goal_cache field in struct mptcp_sock that's now 
> > unused.
> 
> yep, will do in v3 (sorry, I lost track of this review)
> 
> > I like the simplification! Do the self tests trigger enough memory 
> > pressure to test allocation in that scenario?
> 
> Will ensure that before posting v3

Well, we have problems under severe memory pressure, but that happens
even with the current export branch.

I'll send a patch

Cheers,

Paolo

Mat Martineau May 26, 2021, 10:51 p.m. UTC | #4

On Wed, 26 May 2021, Paolo Abeni wrote:

> On Fri, 2021-05-21 at 15:57 -0700, Mat Martineau wrote:
>> On Fri, 21 May 2021, Paolo Abeni wrote:
>>
>>> The mentioned cache was introduced to reduce the number of skb
>>> allocation in atomic context, but the required complexity is
>>> excessive.
>>>
>>> This change remove the mentioned cache.
>>>
>>> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
>>> ---
>>> v1 -> v2:
>>> - drop unused field skb_tx_cache
>>
>> There's also a size_goal_cache field in struct mptcp_sock that's now
>> unused.
>
> yep, will do in v3 (sorry, I lost track of this review)
>
>> I like the simplification! Do the self tests trigger enough memory
>> pressure to test allocation in that scenario?
>
> Will ensure that before posting v3
>
>> The commit that introduced the skb_tx_cache did so to allow removal of the
>> skb_ext cache. It looks like both caches are now unnecessary because the
>> skb and skb_ext allocations are handled together in the
>> __mptcp_{do_,}alloc_tx_skb() functions - so an allocation failure can be
>> handled without the complexity of the caches.
>
> The cache goal was avoid/reduce skb allocation in atomic context. That
> is still true (e.g. with skb_tx_cache we have less skb allocation in
> atomic context, expecially when the transfer is link/pacing/peer
> limited). Plain TCP can allocare skbs in atomic context, but that does
> not happen often. MPTCP will do that almost for each skb,
> when link/pacing/peer limited.
>
> I think we can ignore the above, as e.g. high performance NIC device
> driver always allocate skb in atomic context.
>

I think you have a better sense of the tradeoffs with the allocation 
context than I do - from what you describe it doesn't seem like the extra 
complexity to avoid atomic allocation has a significant payoff.

>>  Does that match your
>> understanding? If so, it would be helpful information for the commit
>> message.
>
> The main trick, imho is that we still use the sk->sk_tx_skb_cache to
> prevent the TCP stack from allocating extension-less skb. That trick
> was introduced with the mptcp tx_cache.
>
> Not sure if the above clarify anything ?!?

Yes - it's helpful background, thanks!

--
Mat Martineau
Intel

[v2,mptcp-next] mptcp: drop tx skb cache

Checks

Commit Message

Comments

Patch