@@ -12,6 +12,10 @@
bool pte_ref_init(pgtable_t pte);
void pte_ref_free(pgtable_t pte);
+void free_user_pte(struct mm_struct *mm, pmd_t *pmd, unsigned long addr);
+bool pte_tryget(struct mm_struct *mm, pmd_t *pmd, unsigned long addr);
+void __pte_put(pgtable_t page);
+void pte_put(pte_t *ptep);
#else /* !CONFIG_FREE_USER_PTE */
@@ -24,6 +28,25 @@ static inline void pte_ref_free(pgtable_t pte)
{
}
+static inline void free_user_pte(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr)
+{
+}
+
+static inline bool pte_tryget(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr)
+{
+ return true;
+}
+
+static inline void __pte_put(pgtable_t page)
+{
+}
+
+static inline void pte_put(pte_t *ptep)
+{
+}
+
#endif /* CONFIG_FREE_USER_PTE */
#endif /* _LINUX_PTE_REF_H */
@@ -44,4 +44,62 @@ void pte_ref_free(pgtable_t pte)
kfree(ref);
}
+void free_user_pte(struct mm_struct *mm, pmd_t *pmd, unsigned long addr) {}
+
+/*
+ * pte_tryget - try to get the pte_ref of the user PTE page table page
+ * @mm: pointer the target address space
+ * @pmd: pointer to a PMD.
+ * @addr: virtual address associated with pmd.
+ *
+ * Return: true if getting the pte_ref succeeded. And false otherwise.
+ *
+ * Before accessing the user PTE page table, we need to hold a refcount to
+ * protect against the concurrent release of the PTE page table.
+ * But we will fail in the following case:
+ * - The content mapped in @pmd is not a PTE page
+ * - The pte_ref is zero, it may be reclaimed
+ */
+bool pte_tryget(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
+{
+ bool retval = true;
+ pmd_t pmdval;
+ pgtable_t pte;
+
+ rcu_read_lock();
+ pmdval = READ_ONCE(*pmd);
+ pte = pmd_pgtable(pmdval);
+ if (unlikely(pmd_none(pmdval) || pmd_leaf(pmdval))) {
+ retval = false;
+ } else if (!percpu_ref_tryget(pte->pte_ref)) {
+ rcu_read_unlock();
+ /*
+ * Also do free_user_pte() here to prevent missed reclaim due
+ * to race condition.
+ */
+ free_user_pte(mm, pmd, addr & PMD_MASK);
+ return false;
+ }
+ rcu_read_unlock();
+
+ return retval;
+}
+
+void __pte_put(pgtable_t page)
+{
+ percpu_ref_put(page->pte_ref);
+}
+
+void pte_put(pte_t *ptep)
+{
+ pgtable_t page;
+
+ if (pte_huge(*ptep))
+ return;
+
+ page = pte_to_page(ptep);
+ __pte_put(page);
+}
+EXPORT_SYMBOL(pte_put);
+
#endif /* CONFIG_FREE_USER_PTE */
The user PTE page table page may be freed when the last percpu_ref is dropped. So we need to try to get its percpu_ref before accessing the PTE page to prevent it form being freed during the access process. This patch adds pte_tryget() and {__,}pte_put() to help us to get and put the percpu_ref of user PTE page table pages. Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> --- include/linux/pte_ref.h | 23 ++++++++++++++++ mm/pte_ref.c | 58 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+)