@@ -19,6 +19,13 @@ There are helpers to lock/unlock a table and other accessor functions:
- pte_offset_map_nolock()
maps PTE, returns pointer to PTE with pointer to its PTE table
lock (not taken), or returns NULL if no PTE table;
+ - pte_offset_map_ro_nolock()
+ maps PTE, returns pointer to PTE with pointer to its PTE table
+ lock (not taken), or returns NULL if no PTE table;
+ - pte_offset_map_rw_nolock()
+ maps PTE, returns pointer to PTE with pointer to its PTE table
+ lock (not taken) and the value of its pmd entry, or returns NULL
+ if no PTE table;
- pte_offset_map()
maps PTE, returns pointer to PTE, or returns NULL if no PTE table;
- pte_unmap()
@@ -3017,6 +3017,11 @@ static inline pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr, spinlock_t **ptlp);
+pte_t *pte_offset_map_ro_nolock(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, spinlock_t **ptlp);
+pte_t *pte_offset_map_rw_nolock(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, pmd_t *pmdvalp,
+ spinlock_t **ptlp);
#define pte_unmap_unlock(pte, ptl) do { \
spin_unlock(ptl); \
@@ -317,6 +317,31 @@ pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
return pte;
}
+pte_t *pte_offset_map_ro_nolock(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, spinlock_t **ptlp)
+{
+ pmd_t pmdval;
+ pte_t *pte;
+
+ pte = __pte_offset_map(pmd, addr, &pmdval);
+ if (likely(pte))
+ *ptlp = pte_lockptr(mm, &pmdval);
+ return pte;
+}
+
+pte_t *pte_offset_map_rw_nolock(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, pmd_t *pmdvalp,
+ spinlock_t **ptlp)
+{
+ pte_t *pte;
+
+ VM_WARN_ON_ONCE(!pmdvalp);
+ pte = __pte_offset_map(pmd, addr, pmdvalp);
+ if (likely(pte))
+ *ptlp = pte_lockptr(mm, pmdvalp);
+ return pte;
+}
+
/*
* pte_offset_map_lock(mm, pmd, addr, ptlp), and its internal implementation
* __pte_offset_map_lock() below, is usually called with the pmd pointer for
@@ -356,6 +381,29 @@ pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
* recheck *pmd once the lock is taken; in practice, no callsite needs that -
* either the mmap_lock for write, or pte_same() check on contents, is enough.
*
+ * pte_offset_map_ro_nolock(mm, pmd, addr, ptlp), above, is like pte_offset_map();
+ * but when successful, it also outputs a pointer to the spinlock in ptlp - as
+ * pte_offset_map_lock() does, but in this case without locking it. This helps
+ * the caller to avoid a later pte_lockptr(mm, *pmd), which might by that time
+ * act on a changed *pmd: pte_offset_map_ro_nolock() provides the correct spinlock
+ * pointer for the page table that it returns. Even after grabbing the spinlock,
+ * we might be looking either at a page table that is still mapped or one that
+ * was unmapped and is about to get freed. But for R/O access this is sufficient.
+ * So it is only applicable for read-only cases where any modification operations
+ * to the page table are not allowed even if the corresponding spinlock is held
+ * afterwards.
+ *
+ * pte_offset_map_rw_nolock(mm, pmd, addr, pmdvalp, ptlp), above, is like
+ * pte_offset_map_ro_nolock(); but when successful, it also outputs the pdmval.
+ * It is applicable for may-write cases where any modification operations to the
+ * page table may happen after the corresponding spinlock is held afterwards.
+ * But the users should make sure the page table is stable like checking pte_same()
+ * or checking pmd_same() by using the output pmdval before performing the write
+ * operations.
+ *
+ * Note: "RO" / "RW" expresses the intended semantics, not that the *kmap* will
+ * be read-only/read-write protected.
+ *
* Note that free_pgtables(), used after unmapping detached vmas, or when
* exiting the whole mm, does not take page table lock before freeing a page
* table, and may not use RCU at all: "outsiders" like khugepaged should avoid