diff mbox series

[for-next,5/5] IB/hfi1: Place struct mmu_rb_handler on cache line start

Message ID 168088636963.3027109.16959757980497822530.stgit@252.162.96.66.static.eigbox.net (mailing list archive)
State Accepted
Headers show
Series Updates for next cycle | expand

Commit Message

Dennis Dalessandro April 7, 2023, 4:52 p.m. UTC
From: Patrick Kelsey <pat.kelsey@cornelisnetworks.com>

Place struct mmu_rb_handler on cache line start like so:

	struct mmu_rb_handler *h;
	void *free_ptr;
	int ret;

	free_ptr = kzalloc(sizeof(*h) + cache_line_size() - 1, GFP_KERNEL);
	if (!free_ptr)
		return -ENOMEM;

	h = PTR_ALIGN(free_ptr, cache_line_size());

Additionally, move struct mmu_rb_handler fields "root" and "ops_args" to
start after the next cacheline using the "____cacheline_aligned_in_smp"
annotation.

Allocating an additional cache_line_size() - 1 bytes to place
struct mmu_rb_handler on a cache line start does increase memory
consumption.

However, few struct mmu_rb_handler are created when hfi1 is in use.
As mmu_rb_handler->root and mmu_rb_handler->ops_args are accessed
frequently, the advantage of having them both within a cache line is
expected to outweigh the disadvantage of the additional memory
consumption per struct mmu_rb_handler.

Signed-off-by: Brendan Cunningham <bcunningham@cornelisnetworks.com>
Signed-off-by: Patrick Kelsey <pat.kelsey@cornelisnetworks.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
---
 drivers/infiniband/hw/hfi1/mmu_rb.c |   11 +++++++----
 drivers/infiniband/hw/hfi1/mmu_rb.h |   14 ++++++++++++--
 2 files changed, 19 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 71b9ac018887..1cea8b0c78e0 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -46,12 +46,14 @@  int hfi1_mmu_rb_register(void *ops_arg,
 			 struct mmu_rb_handler **handler)
 {
 	struct mmu_rb_handler *h;
+	void *free_ptr;
 	int ret;
 
-	h = kzalloc(sizeof(*h), GFP_KERNEL);
-	if (!h)
+	free_ptr = kzalloc(sizeof(*h) + cache_line_size() - 1, GFP_KERNEL);
+	if (!free_ptr)
 		return -ENOMEM;
 
+	h = PTR_ALIGN(free_ptr, cache_line_size());
 	h->root = RB_ROOT_CACHED;
 	h->ops = ops;
 	h->ops_arg = ops_arg;
@@ -62,10 +64,11 @@  int hfi1_mmu_rb_register(void *ops_arg,
 	INIT_LIST_HEAD(&h->del_list);
 	INIT_LIST_HEAD(&h->lru_list);
 	h->wq = wq;
+	h->free_ptr = free_ptr;
 
 	ret = mmu_notifier_register(&h->mn, current->mm);
 	if (ret) {
-		kfree(h);
+		kfree(free_ptr);
 		return ret;
 	}
 
@@ -108,7 +111,7 @@  void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
 	/* Now the mm may be freed. */
 	mmdrop(handler->mn.mm);
 
-	kfree(handler);
+	kfree(handler->free_ptr);
 }
 
 int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h
index ed75acdb7b83..c4da064188c9 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.h
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
@@ -33,15 +33,25 @@  struct mmu_rb_ops {
 };
 
 struct mmu_rb_handler {
+	/*
+	 * struct mmu_notifier is 56 bytes, and spinlock_t is 4 bytes, so
+	 * they fit together in one cache line.  mn is relatively rarely
+	 * accessed, so co-locating the spinlock with it achieves much of
+	 * the cacheline contention reduction of giving the spinlock its own
+	 * cacheline without the overhead of doing so.
+	 */
 	struct mmu_notifier mn;
-	struct rb_root_cached root;
-	void *ops_arg;
 	spinlock_t lock;        /* protect the RB tree */
+
+	/* Begin on a new cachline boundary here */
+	struct rb_root_cached root ____cacheline_aligned_in_smp;
+	void *ops_arg;
 	struct mmu_rb_ops *ops;
 	struct list_head lru_list;
 	struct work_struct del_work;
 	struct list_head del_list;
 	struct workqueue_struct *wq;
+	void *free_ptr;
 };
 
 int hfi1_mmu_rb_register(void *ops_arg,