diff mbox series

[net-next,v3,4/4] net: dqs: make struct dql more cache efficient

Message ID 20240411192241.2498631-5-leitao@debian.org (mailing list archive)
State Accepted
Commit 4ba67ef3a1fbb7d8dc5f00de9b93a583d05b38cc
Delegated to: Netdev Maintainers
Headers show
Series net: dqs: optimize if stall threshold is not set | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4856 this patch: 4856
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 3 of 3 maintainers
netdev/build_clang success Errors and warnings before: 1056 this patch: 1056
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 5132 this patch: 5132
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 54 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-04-12--06-00 (tests: 962)

Commit Message

Breno Leitao April 11, 2024, 7:22 p.m. UTC
With the previous change, struct dqs->stall_thrs will be in the hot path
(at queue side), even if DQS is disabled.

The other fields accessed in this function (last_obj_cnt and num_queued)
are in the first cache line, let's move this field  (stall_thrs) to the
very first cache line, since there is a hole there.

This does not change the structure size, since it moves an short (2
bytes) to 4-bytes whole in the first cache line.

This is the new structure format now:

struct dql {
	unsigned int    num_queued;
	unsigned int    last_obj_cnt;
...
	short unsigned int    stall_thrs;
	/* XXX 2 bytes hole, try to pack */
...
	/* --- cacheline 1 boundary (64 bytes) --- */
...
 	/* Longest stall detected, reported to user */
	short unsigned int         stall_max;
	/* XXX 2 bytes hole, try to pack */
};

Also, read the stall_thrs (now in the very first cache line) earlier,
together with dql->num_queued (also in the first cache line).

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Breno Leitao <leitao@debian.org>
---
 include/linux/dynamic_queue_limits.h |  5 +++--
 lib/dynamic_queue_limits.c           | 13 +++++++++----
 2 files changed, 12 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
index 869afb800ea1..281298e77a15 100644
--- a/include/linux/dynamic_queue_limits.h
+++ b/include/linux/dynamic_queue_limits.h
@@ -50,6 +50,9 @@  struct dql {
 	unsigned int	adj_limit;		/* limit + num_completed */
 	unsigned int	last_obj_cnt;		/* Count at last queuing */
 
+	/* Stall threshold (in jiffies), defined by user */
+	unsigned short	stall_thrs;
+
 	unsigned long	history_head;		/* top 58 bits of jiffies */
 	/* stall entries, a bit per entry */
 	unsigned long	history[DQL_HIST_LEN];
@@ -71,8 +74,6 @@  struct dql {
 	unsigned int	min_limit;		/* Minimum limit */
 	unsigned int	slack_hold_time;	/* Time to measure slack */
 
-	/* Stall threshold (in jiffies), defined by user */
-	unsigned short	stall_thrs;
 	/* Longest stall detected, reported to user */
 	unsigned short	stall_max;
 	unsigned long	last_reap;		/* Last reap (in jiffies) */
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index a1389db1c30a..e49deddd3de9 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -15,12 +15,10 @@ 
 #define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
 #define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)
 
-static void dql_check_stall(struct dql *dql)
+static void dql_check_stall(struct dql *dql, unsigned short stall_thrs)
 {
-	unsigned short stall_thrs;
 	unsigned long now;
 
-	stall_thrs = READ_ONCE(dql->stall_thrs);
 	if (!stall_thrs)
 		return;
 
@@ -86,9 +84,16 @@  void dql_completed(struct dql *dql, unsigned int count)
 {
 	unsigned int inprogress, prev_inprogress, limit;
 	unsigned int ovlimit, completed, num_queued;
+	unsigned short stall_thrs;
 	bool all_prev_completed;
 
 	num_queued = READ_ONCE(dql->num_queued);
+	/* Read stall_thrs in advance since it belongs to the same (first)
+	 * cache line as ->num_queued. This way, dql_check_stall() does not
+	 * need to touch the first cache line again later, reducing the window
+	 * of possible false sharing.
+	 */
+	stall_thrs = READ_ONCE(dql->stall_thrs);
 
 	/* Can't complete more than what's in queue */
 	BUG_ON(count > num_queued - dql->num_completed);
@@ -178,7 +183,7 @@  void dql_completed(struct dql *dql, unsigned int count)
 	dql->num_completed = completed;
 	dql->prev_num_queued = num_queued;
 
-	dql_check_stall(dql);
+	dql_check_stall(dql, stall_thrs);
 }
 EXPORT_SYMBOL(dql_completed);