Message ID | 20201108051730.2042693-15-dima@arista.com (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
Series | Add generic user_landing tracking | expand |
On Sat, Nov 7, 2020 at 9:18 PM Dmitry Safonov <dima@arista.com> wrote: > > Instead of having every architecture to define vdso_base/vdso_addr etc, > provide a generic mechanism to track landing in userspace. > It'll minimize per-architecture difference, the number of callbacks to > provide. > > Originally, it started from thread [1] where the need for .close() > callback on vm_special_mapping was pointed, this generic code besides > removing duplicated .mremap() callbacks provides a cheaper way to > support munmap() on vdso mappings without introducing .close() callbacks > for every architecture (with would bring even more code duplication). I find the naming odd. It's called "user_landing", which is presumably a hard-to-understand shorthand for "user mode landing pad for return from a signal handler if SA_RESTORER is not set". But, looking at the actual code, it's not this at all -- it's just the vDSO base address. So how about just calling it vdso_base? I'm very much in favor of consolidating and cleaning up, and improving the vdso remap/unmap code, but I'm not convinced that we should call it anything other than the vdso base. --Andy
On 11/8/20 7:04 PM, Andy Lutomirski wrote: > On Sat, Nov 7, 2020 at 9:18 PM Dmitry Safonov <dima@arista.com> wrote: >> >> Instead of having every architecture to define vdso_base/vdso_addr etc, >> provide a generic mechanism to track landing in userspace. >> It'll minimize per-architecture difference, the number of callbacks to >> provide. >> >> Originally, it started from thread [1] where the need for .close() >> callback on vm_special_mapping was pointed, this generic code besides >> removing duplicated .mremap() callbacks provides a cheaper way to >> support munmap() on vdso mappings without introducing .close() callbacks >> for every architecture (with would bring even more code duplication). > > I find the naming odd. It's called "user_landing", which is > presumably a hard-to-understand shorthand for "user mode landing pad > for return from a signal handler if SA_RESTORER is not set". But, > looking at the actual code, it's not this at all -- it's just the vDSO > base address. Agree. Originally, I tried to track the actual landing address on the vdso, but .mremap() seemed simpler when tracking the vma base. > So how about just calling it vdso_base? I'm very much in favor of > consolidating and cleaning up, and improving the vdso remap/unmap > code, but I'm not convinced that we should call it anything other than > the vdso base. Sure. Thanks, Dmitry
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index e916646adc69..786c97203bf6 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -1458,7 +1458,8 @@ static int pseudo_lock_dev_release(struct inode *inode, struct file *filp) return 0; } -static int pseudo_lock_dev_mremap(struct vm_area_struct *area, unsigned long flags) +static int pseudo_lock_dev_mremap(struct vm_area_struct *old_vma, + struct vm_area_struct *new_vma, unsigned long flags) { /* Not supported */ return -EINVAL; diff --git a/fs/aio.c b/fs/aio.c index d1dad4cd860f..2695dc9ed46f 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -324,7 +324,8 @@ static void aio_free_ring(struct kioctx *ctx) } } -static int aio_ring_mremap(struct vm_area_struct *vma, unsigned long flags) +static int aio_ring_mremap(struct vm_area_struct *old_vma, + struct vm_area_struct *vma, unsigned long flags) { struct file *file = vma->vm_file; struct mm_struct *mm = vma->vm_mm; diff --git a/include/linux/mm.h b/include/linux/mm.h index 427911d2c83e..4b0f97a289b3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -559,7 +559,8 @@ struct vm_operations_struct { void (*close)(struct vm_area_struct * area); /* Called any time before splitting to check if it's allowed */ int (*may_split)(struct vm_area_struct *area, unsigned long addr); - int (*mremap)(struct vm_area_struct *area, unsigned long flags); + int (*mremap)(struct vm_area_struct *old_vma, + struct vm_area_struct *new_vma, unsigned long flags); vm_fault_t (*fault)(struct vm_fault *vmf); vm_fault_t (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b035caff6abe..f888257e973a 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -488,6 +488,16 @@ struct mm_struct { /* Architecture-specific MM context */ mm_context_t context; +#ifdef CONFIG_ARCH_HAS_USER_LANDING + /* + * Address of special mapping VMA to land after processing + * a signal. Reads are unprotected: if a thread unmaps or + * mremaps the mapping while another thread is processing + * a signal, it can segfault while landing. + */ + void __user *user_landing; +#endif +#define UNMAPPED_USER_LANDING TASK_SIZE_MAX unsigned long flags; /* Must use atomic bitops to access */ diff --git a/mm/Kconfig b/mm/Kconfig index 01b0ae0cd9d3..d43b61a21be8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -883,4 +883,7 @@ config ARCH_HAS_HUGEPD config MAPPING_DIRTY_HELPERS bool +config ARCH_HAS_USER_LANDING + bool + endmenu diff --git a/mm/mmap.c b/mm/mmap.c index 2376f3972f13..8a17ffdedacb 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3410,11 +3410,25 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages) static vm_fault_t special_mapping_fault(struct vm_fault *vmf); +static void update_user_landing(struct vm_area_struct *old_vma, + unsigned long new_addr) +{ +#ifdef CONFIG_ARCH_HAS_USER_LANDING + struct mm_struct *mm = old_vma->vm_mm; + + if (WARN_ON_ONCE(!mm)) + return; + if (old_vma->vm_start == (unsigned long)mm->user_landing) + mm->user_landing = (void __user *)new_addr; +#endif +} + /* * Having a close hook prevents vma merging regardless of flags. */ static void special_mapping_close(struct vm_area_struct *vma) { + update_user_landing(vma, UNMAPPED_USER_LANDING); } static const char *special_mapping_name(struct vm_area_struct *vma) @@ -3422,7 +3436,8 @@ static const char *special_mapping_name(struct vm_area_struct *vma) return ((struct vm_special_mapping *)vma->vm_private_data)->name; } -static int special_mapping_mremap(struct vm_area_struct *new_vma, +static int special_mapping_mremap(struct vm_area_struct *old_vma, + struct vm_area_struct *new_vma, unsigned long flags) { struct vm_special_mapping *sm = new_vma->vm_private_data; @@ -3436,6 +3451,8 @@ static int special_mapping_mremap(struct vm_area_struct *new_vma, if (sm->mremap) sm->mremap(sm, new_vma); + update_user_landing(old_vma, new_vma->vm_start); + return 0; } diff --git a/mm/mremap.c b/mm/mremap.c index c5590afe7165..9595f6b72101 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -543,7 +543,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, if (moved_len < old_len) { err = -ENOMEM; } else if (vma->vm_ops && vma->vm_ops->mremap) { - err = vma->vm_ops->mremap(new_vma, flags); + err = vma->vm_ops->mremap(vma, new_vma, flags); } if (unlikely(err)) {
Instead of having every architecture to define vdso_base/vdso_addr etc, provide a generic mechanism to track landing in userspace. It'll minimize per-architecture difference, the number of callbacks to provide. Originally, it started from thread [1] where the need for .close() callback on vm_special_mapping was pointed, this generic code besides removing duplicated .mremap() callbacks provides a cheaper way to support munmap() on vdso mappings without introducing .close() callbacks for every architecture (with would bring even more code duplication). [1]: https://lore.kernel.org/linux-arch/CAJwJo6ZANqYkSHbQ+3b+Fi_VT80MtrzEV5yreQAWx-L8j8x2zA@mail.gmail.com/ Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: linux-mips@vger.kernel.org Signed-off-by: Dmitry Safonov <dima@arista.com> --- arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 3 ++- fs/aio.c | 3 ++- include/linux/mm.h | 3 ++- include/linux/mm_types.h | 10 ++++++++++ mm/Kconfig | 3 +++ mm/mmap.c | 19 ++++++++++++++++++- mm/mremap.c | 2 +- 7 files changed, 38 insertions(+), 5 deletions(-)