diff mbox series

[v5,17/17] mm: Check the unexpected modification of COW-ed PTE

Message ID 20230414142341.354556-18-shiyn.lin@gmail.com (mailing list archive)
State Changes Requested
Headers show
Series Introduce Copy-On-Write to Page Table | expand

Commit Message

Chih-En Lin April 14, 2023, 2:23 p.m. UTC
In the most of the cases, we don't expect any write access to COW-ed PTE
table. To prevent this, add the new modification check to the page table
check.

But, there are still some of valid reasons where we might want to modify
COW-ed PTE tables. Therefore, add the enable/disable function to the
check.

Signed-off-by: Chih-En Lin <shiyn.lin@gmail.com>
---
 arch/x86/include/asm/pgtable.h   |  1 +
 include/linux/page_table_check.h | 62 ++++++++++++++++++++++++++++++++
 mm/memory.c                      |  4 +++
 mm/page_table_check.c            | 58 ++++++++++++++++++++++++++++++
 4 files changed, 125 insertions(+)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 7425f32e5293..6b323c672e36 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1022,6 +1022,7 @@  static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
+	cowed_pte_table_check_modify(mm, addr, ptep, pte);
 	page_table_check_pte_set(mm, addr, ptep, pte);
 	set_pte(ptep, pte);
 }
diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h
index 01e16c7696ec..4a54dc454281 100644
--- a/include/linux/page_table_check.h
+++ b/include/linux/page_table_check.h
@@ -113,6 +113,54 @@  static inline void page_table_check_pte_clear_range(struct mm_struct *mm,
 	__page_table_check_pte_clear_range(mm, addr, pmd);
 }
 
+#ifdef CONFIG_COW_PTE
+void __check_cowed_pte_table_enable(pte_t *ptep);
+void __check_cowed_pte_table_disable(pte_t *ptep);
+void __cowed_pte_table_check_modify(struct mm_struct *mm, unsigned long addr,
+				    pte_t *ptep, pte_t pte);
+
+static inline void check_cowed_pte_table_enable(pte_t *ptep)
+{
+	if (static_branch_likely(&page_table_check_disabled))
+		return;
+
+	__check_cowed_pte_table_enable(ptep);
+}
+
+static inline void check_cowed_pte_table_disable(pte_t *ptep)
+{
+	if (static_branch_likely(&page_table_check_disabled))
+		return;
+
+	__check_cowed_pte_table_disable(ptep);
+}
+
+static inline void cowed_pte_table_check_modify(struct mm_struct *mm,
+						unsigned long addr,
+						pte_t *ptep, pte_t pte)
+{
+	if (static_branch_likely(&page_table_check_disabled))
+		return;
+
+	__cowed_pte_table_check_modify(mm, addr, ptep, pte);
+}
+#else
+static inline void check_cowed_pte_table_enable(pte_t *ptep)
+{
+}
+
+static inline void check_cowed_pte_table_disable(pte_t *ptep)
+{
+}
+
+static inline void cowed_pte_table_check_modify(struct mm_struct *mm,
+						unsigned long addr,
+						pte_t *ptep, pte_t pte)
+{
+}
+#endif /* CONFIG_COW_PTE */
+
+
 #else
 
 static inline void page_table_check_alloc(struct page *page, unsigned int order)
@@ -162,5 +210,19 @@  static inline void page_table_check_pte_clear_range(struct mm_struct *mm,
 {
 }
 
+static inline void check_cowed_pte_table_enable(pte_t *ptep)
+{
+}
+
+static inline void check_cowed_pte_table_disable(pte_t *ptep)
+{
+}
+
+static inline void cowed_pte_table_check_modify(struct mm_struct *mm,
+						unsigned long addr,
+						pte_t *ptep, pte_t pte)
+{
+}
+
 #endif /* CONFIG_PAGE_TABLE_CHECK */
 #endif /* __LINUX_PAGE_TABLE_CHECK_H */
diff --git a/mm/memory.c b/mm/memory.c
index 7908e20f802a..e62487413038 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1202,10 +1202,12 @@  copy_cow_pte_range(struct vm_area_struct *dst_vma,
 				 * Although, parent's PTE is COW-ed, we should
 				 * still need to handle all the swap stuffs.
 				 */
+				check_cowed_pte_table_disable(src_pte);
 				ret = copy_nonpresent_pte(dst_mm, src_mm,
 							  src_pte, src_pte,
 							  curr, curr,
 							  addr, rss);
+				check_cowed_pte_table_enable(src_pte);
 				if (ret == -EIO) {
 					entry = pte_to_swp_entry(*src_pte);
 					break;
@@ -1223,8 +1225,10 @@  copy_cow_pte_range(struct vm_area_struct *dst_vma,
 			 * copy_present_pte() will determine the mapped page
 			 * should be COW mapping or not.
 			 */
+			check_cowed_pte_table_disable(src_pte);
 			ret = copy_present_pte(curr, curr, src_pte, src_pte,
 					       addr, rss, NULL);
+			check_cowed_pte_table_enable(src_pte);
 			/*
 			 * If we need a pre-allocated page for this pte,
 			 * drop the lock, recover all the entries, fall
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 25d8610c0042..5175c7476508 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -14,6 +14,9 @@ 
 struct page_table_check {
 	atomic_t anon_map_count;
 	atomic_t file_map_count;
+#ifdef CONFIG_COW_PTE
+	atomic_t check_cowed_pte;
+#endif
 };
 
 static bool __page_table_check_enabled __initdata =
@@ -248,3 +251,58 @@  void __page_table_check_pte_clear_range(struct mm_struct *mm,
 		pte_unmap(ptep - PTRS_PER_PTE);
 	}
 }
+
+#ifdef CONFIG_COW_PTE
+void __check_cowed_pte_table_enable(pte_t *ptep)
+{
+	struct page *page = pte_page(*ptep);
+	struct page_ext *page_ext = page_ext_get(page);
+	struct page_table_check *ptc = get_page_table_check(page_ext);
+
+	atomic_set(&ptc->check_cowed_pte, 1);
+	page_ext_put(page_ext);
+}
+
+void __check_cowed_pte_table_disable(pte_t *ptep)
+{
+	struct page *page = pte_page(*ptep);
+	struct page_ext *page_ext = page_ext_get(page);
+	struct page_table_check *ptc = get_page_table_check(page_ext);
+
+	atomic_set(&ptc->check_cowed_pte, 0);
+	page_ext_put(page_ext);
+}
+
+static int check_cowed_pte_table(pte_t *ptep)
+{
+	struct page *page = pte_page(*ptep);
+	struct page_ext *page_ext = page_ext_get(page);
+	struct page_table_check *ptc = get_page_table_check(page_ext);
+	int check = 0;
+
+	check = atomic_read(&ptc->check_cowed_pte);
+	page_ext_put(page_ext);
+
+	return check;
+}
+
+void __cowed_pte_table_check_modify(struct mm_struct *mm, unsigned long addr,
+				    pte_t *ptep, pte_t pte)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	if (!test_bit(MMF_COW_PTE, &mm->flags) || !check_cowed_pte_table(ptep))
+		return;
+
+	pgd = pgd_offset(mm, addr);
+	p4d = p4d_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
+	pmd = pmd_offset(pud, addr);
+
+	if (!pmd_none(*pmd) && !pmd_write(*pmd) && cow_pte_count(pmd) > 1)
+		BUG_ON(!pte_same(*ptep, pte));
+}
+#endif