Message ID | 20220901173516.702122-22-surenb@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | per-VMA locks proposal | expand |
Le 01/09/2022 à 19:35, Suren Baghdasaryan a écrit : > Introduce find_and_lock_anon_vma function to lookup and lock an anonymous > VMA during page fault handling. When VMA is not found, can't be locked > or changes after being locked, the function returns NULL. The lookup is > performed under RCU protection to prevent the found VMA from being > destroyed before the VMA lock is acquired. VMA lock statistics are > updated according to the results. > > Signed-off-by: Suren Baghdasaryan <surenb@google.com> > --- > include/linux/mm.h | 3 +++ > mm/memory.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 48 insertions(+) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 7c3190eaabd7..a3cbaa7b9119 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -684,6 +684,9 @@ static inline void vma_assert_no_reader(struct vm_area_struct *vma) > vma); > } > > +struct vm_area_struct *find_and_lock_anon_vma(struct mm_struct *mm, > + unsigned long address); > + > #else /* CONFIG_PER_VMA_LOCK */ > > static inline void vma_init_lock(struct vm_area_struct *vma) {} > diff --git a/mm/memory.c b/mm/memory.c > index 29d2f49f922a..bf557f7056de 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -5183,6 +5183,51 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, > } > EXPORT_SYMBOL_GPL(handle_mm_fault); > > +#ifdef CONFIG_PER_VMA_LOCK > +static inline struct vm_area_struct *find_vma_under_rcu(struct mm_struct *mm, > + unsigned long address) > +{ > + struct vm_area_struct *vma = __find_vma(mm, address); > + > + if (!vma || vma->vm_start > address) > + return NULL; > + > + if (!vma_is_anonymous(vma)) > + return NULL; > + It looks to me more natural to first check that the VMA is part of the RB tree before try read locking it. > + if (!vma_read_trylock(vma)) { > + count_vm_vma_lock_event(VMA_LOCK_ABORT); > + return NULL; > + } > + > + /* Check if the VMA got isolated after we found it */ > + if (RB_EMPTY_NODE(&vma->vm_rb)) { > + vma_read_unlock(vma); > + count_vm_vma_lock_event(VMA_LOCK_MISS); > + return NULL; > + } > + > + return vma; > +} > + > +/* > + * Lookup and lock and anonymous VMA. Returned VMA is guaranteed to be stable > + * and not isolated. If the VMA is not found of is being modified the function > + * returns NULL. > + */ > +struct vm_area_struct *find_and_lock_anon_vma(struct mm_struct *mm, > + unsigned long address) > +{ > + struct vm_area_struct *vma; > + > + rcu_read_lock(); > + vma = find_vma_under_rcu(mm, address); > + rcu_read_unlock(); > + > + return vma; > +} > +#endif /* CONFIG_PER_VMA_LOCK */ > + > #ifndef __PAGETABLE_P4D_FOLDED > /* > * Allocate p4d page table.
On Fri, Sep 9, 2022 at 7:38 AM Laurent Dufour <ldufour@linux.ibm.com> wrote: > > Le 01/09/2022 à 19:35, Suren Baghdasaryan a écrit : > > Introduce find_and_lock_anon_vma function to lookup and lock an anonymous > > VMA during page fault handling. When VMA is not found, can't be locked > > or changes after being locked, the function returns NULL. The lookup is > > performed under RCU protection to prevent the found VMA from being > > destroyed before the VMA lock is acquired. VMA lock statistics are > > updated according to the results. > > > > Signed-off-by: Suren Baghdasaryan <surenb@google.com> > > --- > > include/linux/mm.h | 3 +++ > > mm/memory.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ > > 2 files changed, 48 insertions(+) > > > > diff --git a/include/linux/mm.h b/include/linux/mm.h > > index 7c3190eaabd7..a3cbaa7b9119 100644 > > --- a/include/linux/mm.h > > +++ b/include/linux/mm.h > > @@ -684,6 +684,9 @@ static inline void vma_assert_no_reader(struct vm_area_struct *vma) > > vma); > > } > > > > +struct vm_area_struct *find_and_lock_anon_vma(struct mm_struct *mm, > > + unsigned long address); > > + > > #else /* CONFIG_PER_VMA_LOCK */ > > > > static inline void vma_init_lock(struct vm_area_struct *vma) {} > > diff --git a/mm/memory.c b/mm/memory.c > > index 29d2f49f922a..bf557f7056de 100644 > > --- a/mm/memory.c > > +++ b/mm/memory.c > > @@ -5183,6 +5183,51 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, > > } > > EXPORT_SYMBOL_GPL(handle_mm_fault); > > > > +#ifdef CONFIG_PER_VMA_LOCK > > +static inline struct vm_area_struct *find_vma_under_rcu(struct mm_struct *mm, > > + unsigned long address) > > +{ > > + struct vm_area_struct *vma = __find_vma(mm, address); > > + > > + if (!vma || vma->vm_start > address) > > + return NULL; > > + > > + if (!vma_is_anonymous(vma)) > > + return NULL; > > + > > It looks to me more natural to first check that the VMA is part of the RB > tree before try read locking it. I think we want to check that the VMA is still part of the mm _after_ we locked it. Otherwise we might pass the check, then some other thread does (lock->isolate->unlock) and then we lock the VMA. We would end up with a VMA that is not part of mm anymore but we assume it is. > > > + if (!vma_read_trylock(vma)) { > > + count_vm_vma_lock_event(VMA_LOCK_ABORT); > > + return NULL; > > + } > > + > > + /* Check if the VMA got isolated after we found it */ > > + if (RB_EMPTY_NODE(&vma->vm_rb)) { > > + vma_read_unlock(vma); > > + count_vm_vma_lock_event(VMA_LOCK_MISS); > > + return NULL; > > + } > > + > > + return vma; > > +} > > + > > +/* > > + * Lookup and lock and anonymous VMA. Returned VMA is guaranteed to be stable > > + * and not isolated. If the VMA is not found of is being modified the function > > + * returns NULL. > > + */ > > +struct vm_area_struct *find_and_lock_anon_vma(struct mm_struct *mm, > > + unsigned long address) > > +{ > > + struct vm_area_struct *vma; > > + > > + rcu_read_lock(); > > + vma = find_vma_under_rcu(mm, address); > > + rcu_read_unlock(); > > + > > + return vma; > > +} > > +#endif /* CONFIG_PER_VMA_LOCK */ > > + > > #ifndef __PAGETABLE_P4D_FOLDED > > /* > > * Allocate p4d page table. >
diff --git a/include/linux/mm.h b/include/linux/mm.h index 7c3190eaabd7..a3cbaa7b9119 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -684,6 +684,9 @@ static inline void vma_assert_no_reader(struct vm_area_struct *vma) vma); } +struct vm_area_struct *find_and_lock_anon_vma(struct mm_struct *mm, + unsigned long address); + #else /* CONFIG_PER_VMA_LOCK */ static inline void vma_init_lock(struct vm_area_struct *vma) {} diff --git a/mm/memory.c b/mm/memory.c index 29d2f49f922a..bf557f7056de 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5183,6 +5183,51 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, } EXPORT_SYMBOL_GPL(handle_mm_fault); +#ifdef CONFIG_PER_VMA_LOCK +static inline struct vm_area_struct *find_vma_under_rcu(struct mm_struct *mm, + unsigned long address) +{ + struct vm_area_struct *vma = __find_vma(mm, address); + + if (!vma || vma->vm_start > address) + return NULL; + + if (!vma_is_anonymous(vma)) + return NULL; + + if (!vma_read_trylock(vma)) { + count_vm_vma_lock_event(VMA_LOCK_ABORT); + return NULL; + } + + /* Check if the VMA got isolated after we found it */ + if (RB_EMPTY_NODE(&vma->vm_rb)) { + vma_read_unlock(vma); + count_vm_vma_lock_event(VMA_LOCK_MISS); + return NULL; + } + + return vma; +} + +/* + * Lookup and lock and anonymous VMA. Returned VMA is guaranteed to be stable + * and not isolated. If the VMA is not found of is being modified the function + * returns NULL. + */ +struct vm_area_struct *find_and_lock_anon_vma(struct mm_struct *mm, + unsigned long address) +{ + struct vm_area_struct *vma; + + rcu_read_lock(); + vma = find_vma_under_rcu(mm, address); + rcu_read_unlock(); + + return vma; +} +#endif /* CONFIG_PER_VMA_LOCK */ + #ifndef __PAGETABLE_P4D_FOLDED /* * Allocate p4d page table.
Introduce find_and_lock_anon_vma function to lookup and lock an anonymous VMA during page fault handling. When VMA is not found, can't be locked or changes after being locked, the function returns NULL. The lookup is performed under RCU protection to prevent the found VMA from being destroyed before the VMA lock is acquired. VMA lock statistics are updated according to the results. Signed-off-by: Suren Baghdasaryan <surenb@google.com> --- include/linux/mm.h | 3 +++ mm/memory.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+)