diff mbox series

[v2,08/20] mm/mshare: flush all TLBs when updating PTEs in an mshare range

Message ID 20250404021902.48863-9-anthony.yznaga@oracle.com (mailing list archive)
State New
Headers show
Series Add support for shared PTEs across processes | expand

Commit Message

Anthony Yznaga April 4, 2025, 2:18 a.m. UTC
Unlike the mm of a task, an mshare host mm is not updated on context
switch. In particular this means that mm_cpumask is never updated
which results in TLB flushes for updates to mshare PTEs only being
done on the local CPU. To ensure entries are flushed for non-local
TLBs, set up an mmu notifier on the mshare mm and use the
.arch_invalidate_secondary_tlbs callback to flush all TLBs.
arch_invalidate_secondary_tlbs guarantees that TLB entries will be
flushed before pages are freed when unmapping pages in an mshare region.

Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
---
 mm/mshare.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
diff mbox series

Patch

diff --git a/mm/mshare.c b/mm/mshare.c
index 6bdbcfa8deea..792d86c61042 100644
--- a/mm/mshare.c
+++ b/mm/mshare.c
@@ -16,8 +16,10 @@ 
 #include <linux/fs.h>
 #include <linux/fs_context.h>
 #include <linux/mman.h>
+#include <linux/mmu_notifier.h>
 #include <uapi/linux/magic.h>
 #include <linux/falloc.h>
+#include <asm/tlbflush.h>
 
 const unsigned long mshare_align = P4D_SIZE;
 const unsigned long mshare_base = mshare_align;
@@ -29,6 +31,17 @@  struct mshare_data {
 	unsigned long start;
 	unsigned long size;
 	unsigned long flags;
+	struct mmu_notifier mn;
+};
+
+static void mshare_invalidate_tlbs(struct mmu_notifier *mn, struct mm_struct *mm,
+				   unsigned long start, unsigned long end)
+{
+	flush_tlb_all();
+}
+
+static const struct mmu_notifier_ops mshare_mmu_ops = {
+	.arch_invalidate_secondary_tlbs = mshare_invalidate_tlbs,
 };
 
 static int mshare_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
@@ -237,6 +250,10 @@  msharefs_fill_mm(struct inode *inode)
 	m_data->mm = mm;
 	m_data->start = mshare_base;
 	inode->i_private = m_data;
+	m_data->mn.ops = &mshare_mmu_ops;
+	ret = mmu_notifier_register(&m_data->mn, mm);
+	if (ret)
+		goto err_free;
 
 	return 0;