@@ -776,6 +776,8 @@ static inline void assert_fault_locked(struct vm_fault *vmf)
mmap_assert_locked(vmf->vma->vm_mm);
}
+struct vm_area_struct *find_and_lock_vma_rcu(struct mm_struct *mm,
+ unsigned long address);
struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
unsigned long address);
@@ -790,6 +792,12 @@ static inline void vma_assert_write_locked(struct vm_area_struct *vma)
static inline void vma_mark_detached(struct vm_area_struct *vma,
bool detached) {}
+struct vm_area_struct *find_and_lock_vma_rcu(struct mm_struct *mm,
+ unsigned long address)
+{
+ return -EOPNOTSUPP;
+}
+
static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
unsigned long address)
{
@@ -5824,6 +5824,68 @@ struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
#endif
#ifdef CONFIG_PER_VMA_LOCK
+/*
+ * find_and_lock_vma_rcu() - Find and lock the VMA for a given address, or the
+ * next VMA. Search is done under RCU protection, without taking or assuming
+ * mmap_lock. Returned VMA is guaranteed to be stable and not isolated.
+
+ * @mm: The mm_struct to check
+ * @addr: The address
+ *
+ * Returns: The VMA associated with addr, or the next VMA.
+ * May return %NULL in the case of no VMA at addr or above.
+ * If the VMA is being modified and can't be locked, -EBUSY is returned.
+ */
+struct vm_area_struct *find_and_lock_vma_rcu(struct mm_struct *mm,
+ unsigned long address)
+{
+ MA_STATE(mas, &mm->mm_mt, address, address);
+ struct vm_area_struct *vma;
+ int err;
+
+ rcu_read_lock();
+retry:
+ vma = mas_find(&mas, ULONG_MAX);
+ if (!vma) {
+ err = 0; /* no VMA, return NULL */
+ goto inval;
+ }
+
+ if (!vma_start_read(vma)) {
+ err = -EBUSY;
+ goto inval;
+ }
+
+ /*
+ * Check since vm_start/vm_end might change before we lock the VMA.
+ * Note, unlike lock_vma_under_rcu() we are searching for VMA covering
+ * address or the next one, so we only make sure VMA wasn't updated to
+ * end before the address.
+ */
+ if (unlikely(vma->vm_end <= address)) {
+ err = -EBUSY;
+ goto inval_end_read;
+ }
+
+ /* Check if the VMA got isolated after we found it */
+ if (vma->detached) {
+ vma_end_read(vma);
+ count_vm_vma_lock_event(VMA_LOCK_MISS);
+ /* The area was replaced with another one */
+ goto retry;
+ }
+
+ rcu_read_unlock();
+ return vma;
+
+inval_end_read:
+ vma_end_read(vma);
+inval:
+ rcu_read_unlock();
+ count_vm_vma_lock_event(VMA_LOCK_ABORT);
+ return ERR_PTR(err);
+}
+
/*
* Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be
* stable and not isolated. If the VMA is not found or is being modified the
Existing lock_vma_under_rcu() API assumes exact VMA match, so it's not a 100% equivalent of find_vma(). There are use cases that do want find_vma() semantics of finding an exact VMA or the next one. Also, it's important for such an API to let user distinguish between not being able to get per-VMA lock and not having any VMAs at or after provided address. As such, this patch adds a new find_vma()-like API, find_and_lock_vma_rcu(), which finds exact or next VMA, attempts to take per-VMA lock, and if that fails, returns ERR_PTR(-EBUSY). It still returns NULL if there is no VMA at or after address. In successfuly case it will return valid and non-isolated VMA with VMA lock taken. This API will be used in subsequent patch in this patch set to implement a new user-facing API for querying process VMAs. Cc: Mike Rapoport <rppt@kernel.org> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Liam Howlett <liam.howlett@oracle.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> --- include/linux/mm.h | 8 ++++++ mm/memory.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+)