diff mbox series

[net-next,34/36] rxrpc: Move the cwnd degradation after transmitting packets

Message ID 166994039212.1732290.14435161630571059598.stgit@warthog.procyon.org.uk (mailing list archive)
State Accepted
Commit 5086d9a9dfec4866806da303115489b0606decb7
Delegated to: Netdev Maintainers
Headers show
Series rxrpc: Increasing SACK size and moving away from softirq, parts 2 & 3 | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count fail Series longer than 15 patches (and no cover letter)
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4 this patch: 4
netdev/cc_maintainers warning 4 maintainers not CCed: edumazet@google.com davem@davemloft.net pabeni@redhat.com kuba@kernel.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 3 this patch: 3
netdev/checkpatch warning WARNING: function definition argument 'struct rxrpc_call *' should also have an identifier name WARNING: line length of 83 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: networking block comments don't use an empty /* line, use /* Comment... WARNING: quoted string split across lines
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

David Howells Dec. 2, 2022, 12:19 a.m. UTC
When we've gone for >1RTT without transmitting a packet, we should reduce
the ssthresh and cut the cwnd by half (as suggested in RFC2861 sec 3.1).

However, we may receive ACK packets in a batch and the first of these may
cut the cwnd, preventing further transmission, and each subsequent one cuts
the cwnd yet further, reducing it to the floor and killing performance.

Fix this by moving the cwnd reset to after doing the transmission and
resetting the base time such that we don't cut the cwnd by half again for
at least another RTT.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---

 net/rxrpc/ar-internal.h |    2 ++
 net/rxrpc/call_event.c  |    7 +++++++
 net/rxrpc/input.c       |   49 ++++++++++++++++++++++++++---------------------
 net/rxrpc/proc.c        |    5 +++--
 4 files changed, 39 insertions(+), 24 deletions(-)
diff mbox series

Patch

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 6cfe366ee224..785cd0dd1eea 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -666,6 +666,7 @@  struct rxrpc_call {
 	 * packets) rather than bytes.
 	 */
 #define RXRPC_TX_SMSS		RXRPC_JUMBO_DATALEN
+#define RXRPC_MIN_CWND		(RXRPC_TX_SMSS > 2190 ? 2 : RXRPC_TX_SMSS > 1095 ? 3 : 4)
 	u8			cong_cwnd;	/* Congestion window size */
 	u8			cong_extra;	/* Extra to send for congestion management */
 	u8			cong_ssthresh;	/* Slow-start threshold */
@@ -953,6 +954,7 @@  void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
 /*
  * input.c
  */
+void rxrpc_congestion_degrade(struct rxrpc_call *);
 void rxrpc_input_call_packet(struct rxrpc_call *, struct sk_buff *);
 void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *);
 
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 9f1e490ab976..fd122e3726bd 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -427,6 +427,13 @@  void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
 
 	rxrpc_transmit_some_data(call);
 
+	if (skb) {
+		struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+		if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK)
+			rxrpc_congestion_degrade(call);
+	}
+
 	if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events))
 		rxrpc_send_initial_ping(call);
 
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 2988e3d0c1f6..d0e20e946e48 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -27,7 +27,6 @@  static void rxrpc_congestion_management(struct rxrpc_call *call,
 	enum rxrpc_congest_change change = rxrpc_cong_no_change;
 	unsigned int cumulative_acks = call->cong_cumul_acks;
 	unsigned int cwnd = call->cong_cwnd;
-	ktime_t now;
 	bool resend = false;
 
 	summary->flight_size =
@@ -57,27 +56,6 @@  static void rxrpc_congestion_management(struct rxrpc_call *call,
 	summary->cumulative_acks = cumulative_acks;
 	summary->dup_acks = call->cong_dup_acks;
 
-	/* If we haven't transmitted anything for >1RTT, we should reset the
-	 * congestion management state.
-	 */
-	now = ktime_get_real();
-	if ((call->cong_mode == RXRPC_CALL_SLOW_START ||
-	     call->cong_mode == RXRPC_CALL_CONGEST_AVOIDANCE) &&
-	    ktime_before(ktime_add_us(call->tx_last_sent,
-				      call->peer->srtt_us >> 3), now)
-	    ) {
-		trace_rxrpc_reset_cwnd(call, now);
-		change = rxrpc_cong_idle_reset;
-		rxrpc_inc_stat(call->rxnet, stat_tx_data_cwnd_reset);
-		summary->mode = RXRPC_CALL_SLOW_START;
-		if (RXRPC_TX_SMSS > 2190)
-			summary->cwnd = 2;
-		else if (RXRPC_TX_SMSS > 1095)
-			summary->cwnd = 3;
-		else
-			summary->cwnd = 4;
-	}
-
 	switch (call->cong_mode) {
 	case RXRPC_CALL_SLOW_START:
 		if (summary->saw_nacks)
@@ -197,6 +175,33 @@  static void rxrpc_congestion_management(struct rxrpc_call *call,
 	goto out_no_clear_ca;
 }
 
+/*
+ * Degrade the congestion window if we haven't transmitted a packet for >1RTT.
+ */
+void rxrpc_congestion_degrade(struct rxrpc_call *call)
+{
+	ktime_t rtt, now;
+
+	if (call->cong_mode != RXRPC_CALL_SLOW_START &&
+	    call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE)
+		return;
+	if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
+		return;
+
+	rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8));
+	now = ktime_get_real();
+	if (!ktime_before(ktime_add(call->tx_last_sent, rtt), now))
+		return;
+
+	trace_rxrpc_reset_cwnd(call, now);
+	rxrpc_inc_stat(call->rxnet, stat_tx_data_cwnd_reset);
+	call->tx_last_sent = now;
+	call->cong_mode = RXRPC_CALL_SLOW_START;
+	call->cong_ssthresh = max_t(unsigned int, call->cong_ssthresh,
+				    call->cong_cwnd * 3 / 4);
+	call->cong_cwnd = max_t(unsigned int, call->cong_cwnd / 2, RXRPC_MIN_CWND);
+}
+
 /*
  * Apply a hard ACK by advancing the Tx window.
  */
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 6816934cb4cf..3a59591ec061 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -61,7 +61,7 @@  static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
 			 "Proto Local                                          "
 			 " Remote                                         "
 			 " SvID ConnID   CallID   End Use State    Abort   "
-			 " DebugId  TxSeq    TW RxSeq    RW RxSerial RxTimo\n");
+			 " DebugId  TxSeq    TW RxSeq    RW RxSerial CW RxTimo\n");
 		return 0;
 	}
 
@@ -84,7 +84,7 @@  static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
 	wtmp   = atomic64_read_acquire(&call->ackr_window);
 	seq_printf(seq,
 		   "UDP   %-47.47s %-47.47s %4x %08x %08x %s %3u"
-		   " %-8.8s %08x %08x %08x %02x %08x %02x %08x %06lx\n",
+		   " %-8.8s %08x %08x %08x %02x %08x %02x %08x %02x %06lx\n",
 		   lbuff,
 		   rbuff,
 		   call->dest_srx.srx_service,
@@ -98,6 +98,7 @@  static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
 		   acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack,
 		   lower_32_bits(wtmp), upper_32_bits(wtmp) - lower_32_bits(wtmp),
 		   call->rx_serial,
+		   call->cong_cwnd,
 		   timeout);
 
 	return 0;