Message ID | 20240817-mseal-depessimize-v3-2-d8d2e037df30@gmail.com (mailing list archive) |
---|---|
State | Accepted |
Commit | df2a7df9a9aa32c3df227de346693e6e802c8591 |
Headers | show |
Series | mm: Optimize mseal checks | expand |
* Pedro Falcato <pedro.falcato@gmail.com> [240816 20:18]: > We were doing an extra mmap tree traversal just to check if the entire > range is modifiable. This can be done when we iterate through the VMAs > instead. > > Signed-off-by: Pedro Falcato <pedro.falcato@gmail.com> Although this has the side effect of potentially splitting the first vma if it is not mseal()'ed, there really is no risk here since someone making an invalid call that splits the vma for whatever reason can modify the bad call to achieve the same split. That is, this doesn't help or hinder an attacker. Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com> > --- > mm/mmap.c | 11 +---------- > mm/vma.c | 19 ++++++++++++------- > 2 files changed, 13 insertions(+), 17 deletions(-) > > diff --git a/mm/mmap.c b/mm/mmap.c > index 3af256bacef3..30ae4cb5cec9 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -1740,16 +1740,7 @@ int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > unsigned long start, unsigned long end, struct list_head *uf, > bool unlock) > { > - struct mm_struct *mm = vma->vm_mm; > - > - /* > - * Check if memory is sealed, prevent unmapping a sealed VMA. > - * can_modify_mm assumes we have acquired the lock on MM. > - */ > - if (unlikely(!can_modify_mm(mm, start, end))) > - return -EPERM; > - > - return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock); > + return do_vmi_align_munmap(vmi, vma, vma->vm_mm, start, end, uf, unlock); > } > > /* > diff --git a/mm/vma.c b/mm/vma.c > index 84965f2cd580..5850f7c0949b 100644 > --- a/mm/vma.c > +++ b/mm/vma.c > @@ -712,6 +712,12 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count) > goto map_count_exceeded; > > + /* Don't bother splitting the VMA if we can't unmap it anyway */ > + if (!can_modify_vma(vma)) { > + error = -EPERM; > + goto start_split_failed; > + } > + > error = __split_vma(vmi, vma, start, 1); > if (error) > goto start_split_failed; > @@ -723,6 +729,11 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > */ > next = vma; > do { > + if (!can_modify_vma(next)) { > + error = -EPERM; > + goto modify_vma_failed; > + } > + > /* Does it split the end? */ > if (next->vm_end > end) { > error = __split_vma(vmi, next, end, 0); > @@ -815,6 +826,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > __mt_destroy(&mt_detach); > return 0; > > +modify_vma_failed: > clear_tree_failed: > userfaultfd_error: > munmap_gather_failed: > @@ -860,13 +872,6 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, > if (end == start) > return -EINVAL; > > - /* > - * Check if memory is sealed, prevent unmapping a sealed VMA. > - * can_modify_mm assumes we have acquired the lock on MM. > - */ > - if (unlikely(!can_modify_mm(mm, start, end))) > - return -EPERM; > - > /* Find the first overlapping VMA */ > vma = vma_find(vmi, end); > if (!vma) { > > -- > 2.46.0 >
On Sat, Aug 17, 2024 at 01:18:29AM GMT, Pedro Falcato wrote: > We were doing an extra mmap tree traversal just to check if the entire > range is modifiable. This can be done when we iterate through the VMAs > instead. > > Signed-off-by: Pedro Falcato <pedro.falcato@gmail.com> > --- > mm/mmap.c | 11 +---------- > mm/vma.c | 19 ++++++++++++------- > 2 files changed, 13 insertions(+), 17 deletions(-) > > diff --git a/mm/mmap.c b/mm/mmap.c > index 3af256bacef3..30ae4cb5cec9 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -1740,16 +1740,7 @@ int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > unsigned long start, unsigned long end, struct list_head *uf, > bool unlock) > { > - struct mm_struct *mm = vma->vm_mm; > - > - /* > - * Check if memory is sealed, prevent unmapping a sealed VMA. > - * can_modify_mm assumes we have acquired the lock on MM. > - */ > - if (unlikely(!can_modify_mm(mm, start, end))) > - return -EPERM; > - > - return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock); > + return do_vmi_align_munmap(vmi, vma, vma->vm_mm, start, end, uf, unlock); > } > Oh I like this. Want more mm/mmap.c stuff to look like this, abstracting actual functionality to mm/vma.c... > /* > diff --git a/mm/vma.c b/mm/vma.c > index 84965f2cd580..5850f7c0949b 100644 > --- a/mm/vma.c > +++ b/mm/vma.c > @@ -712,6 +712,12 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count) > goto map_count_exceeded; > > + /* Don't bother splitting the VMA if we can't unmap it anyway */ > + if (!can_modify_vma(vma)) { > + error = -EPERM; > + goto start_split_failed; > + } > + > error = __split_vma(vmi, vma, start, 1); > if (error) > goto start_split_failed; > @@ -723,6 +729,11 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > */ > next = vma; > do { > + if (!can_modify_vma(next)) { > + error = -EPERM; > + goto modify_vma_failed; > + } > + > /* Does it split the end? */ > if (next->vm_end > end) { > error = __split_vma(vmi, next, end, 0); > @@ -815,6 +826,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > __mt_destroy(&mt_detach); > return 0; > > +modify_vma_failed: > clear_tree_failed: > userfaultfd_error: > munmap_gather_failed: > @@ -860,13 +872,6 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, > if (end == start) > return -EINVAL; > > - /* > - * Check if memory is sealed, prevent unmapping a sealed VMA. > - * can_modify_mm assumes we have acquired the lock on MM. > - */ > - if (unlikely(!can_modify_mm(mm, start, end))) > - return -EPERM; > - This means we will arch_unmap() first, before realising we can't unmap, however there are a number of other error conditions that would cause a similar outcome in do_vmi_align_munmap() so I don't think that's a problem. > /* Find the first overlapping VMA */ > vma = vma_find(vmi, end); > if (!vma) { > > -- > 2.46.0 > LGTM, Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
On Fri, Aug 16, 2024 at 5:18 PM Pedro Falcato <pedro.falcato@gmail.com> wrote: > > We were doing an extra mmap tree traversal just to check if the entire > range is modifiable. This can be done when we iterate through the VMAs > instead. > > Signed-off-by: Pedro Falcato <pedro.falcato@gmail.com> > --- > mm/mmap.c | 11 +---------- > mm/vma.c | 19 ++++++++++++------- > 2 files changed, 13 insertions(+), 17 deletions(-) > > diff --git a/mm/mmap.c b/mm/mmap.c > index 3af256bacef3..30ae4cb5cec9 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -1740,16 +1740,7 @@ int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > unsigned long start, unsigned long end, struct list_head *uf, > bool unlock) > { > - struct mm_struct *mm = vma->vm_mm; > - > - /* > - * Check if memory is sealed, prevent unmapping a sealed VMA. > - * can_modify_mm assumes we have acquired the lock on MM. > - */ > - if (unlikely(!can_modify_mm(mm, start, end))) > - return -EPERM; Another approach to improve perf is to clone the vmi (since it already point to the first vma), and pass the cloned vmi/vma into can_modify_mm check, that will remove the cost of re-finding the first VMA. The can_modify_mm then continues from cloned VMI/vma till the end of address range, there will be some perf cost there. However, most address ranges in the real world are within a single VMA, in practice, the perf cost is the same as checking the single VMA, 99.9% case. This will help preserve the nice sealing feature (if one of the vma is sealed, the entire address range is not modified) > - > - return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock); > + return do_vmi_align_munmap(vmi, vma, vma->vm_mm, start, end, uf, unlock); > } > > /* > diff --git a/mm/vma.c b/mm/vma.c > index 84965f2cd580..5850f7c0949b 100644 > --- a/mm/vma.c > +++ b/mm/vma.c > @@ -712,6 +712,12 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count) > goto map_count_exceeded; > > + /* Don't bother splitting the VMA if we can't unmap it anyway */ > + if (!can_modify_vma(vma)) { > + error = -EPERM; > + goto start_split_failed; > + } > + > error = __split_vma(vmi, vma, start, 1); > if (error) > goto start_split_failed; > @@ -723,6 +729,11 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > */ > next = vma; > do { > + if (!can_modify_vma(next)) { > + error = -EPERM; > + goto modify_vma_failed; > + } > + > /* Does it split the end? */ > if (next->vm_end > end) { > error = __split_vma(vmi, next, end, 0); > @@ -815,6 +826,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > __mt_destroy(&mt_detach); > return 0; > > +modify_vma_failed: > clear_tree_failed: > userfaultfd_error: > munmap_gather_failed: > @@ -860,13 +872,6 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, > if (end == start) > return -EINVAL; > > - /* > - * Check if memory is sealed, prevent unmapping a sealed VMA. > - * can_modify_mm assumes we have acquired the lock on MM. > - */ > - if (unlikely(!can_modify_mm(mm, start, end))) > - return -EPERM; > - > /* Find the first overlapping VMA */ > vma = vma_find(vmi, end); > if (!vma) { > > -- > 2.46.0 >
On Wed, Aug 21, 2024 at 5:16 PM Jeff Xu <jeffxu@chromium.org> wrote: > > On Fri, Aug 16, 2024 at 5:18 PM Pedro Falcato <pedro.falcato@gmail.com> wrote: > > > > We were doing an extra mmap tree traversal just to check if the entire > > range is modifiable. This can be done when we iterate through the VMAs > > instead. > > > > Signed-off-by: Pedro Falcato <pedro.falcato@gmail.com> > > --- > > mm/mmap.c | 11 +---------- > > mm/vma.c | 19 ++++++++++++------- > > 2 files changed, 13 insertions(+), 17 deletions(-) > > > > diff --git a/mm/mmap.c b/mm/mmap.c > > index 3af256bacef3..30ae4cb5cec9 100644 > > --- a/mm/mmap.c > > +++ b/mm/mmap.c > > @@ -1740,16 +1740,7 @@ int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > > unsigned long start, unsigned long end, struct list_head *uf, > > bool unlock) > > { > > - struct mm_struct *mm = vma->vm_mm; > > - > > - /* > > - * Check if memory is sealed, prevent unmapping a sealed VMA. > > - * can_modify_mm assumes we have acquired the lock on MM. > > - */ > > - if (unlikely(!can_modify_mm(mm, start, end))) > > - return -EPERM; > Another approach to improve perf is to clone the vmi (since it > already point to the first vma), and pass the cloned vmi/vma into > can_modify_mm check, that will remove the cost of re-finding the first > VMA. > > The can_modify_mm then continues from cloned VMI/vma till the end of > address range, there will be some perf cost there. However, most > address ranges in the real world are within a single VMA, in > practice, the perf cost is the same as checking the single VMA, 99.9% > case. > > This will help preserve the nice sealing feature (if one of the vma is > sealed, the entire address range is not modified) Please drop it. No one wants to preserve this. Everyone is in sync when it comes to the solution except you.
On Wed, Aug 21, 2024 at 9:24 AM Pedro Falcato <pedro.falcato@gmail.com> wrote: > > On Wed, Aug 21, 2024 at 5:16 PM Jeff Xu <jeffxu@chromium.org> wrote: > > > > On Fri, Aug 16, 2024 at 5:18 PM Pedro Falcato <pedro.falcato@gmail.com> wrote: > > > > > > We were doing an extra mmap tree traversal just to check if the entire > > > range is modifiable. This can be done when we iterate through the VMAs > > > instead. > > > > > > Signed-off-by: Pedro Falcato <pedro.falcato@gmail.com> > > > --- > > > mm/mmap.c | 11 +---------- > > > mm/vma.c | 19 ++++++++++++------- > > > 2 files changed, 13 insertions(+), 17 deletions(-) > > > > > > diff --git a/mm/mmap.c b/mm/mmap.c > > > index 3af256bacef3..30ae4cb5cec9 100644 > > > --- a/mm/mmap.c > > > +++ b/mm/mmap.c > > > @@ -1740,16 +1740,7 @@ int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > > > unsigned long start, unsigned long end, struct list_head *uf, > > > bool unlock) > > > { > > > - struct mm_struct *mm = vma->vm_mm; > > > - > > > - /* > > > - * Check if memory is sealed, prevent unmapping a sealed VMA. > > > - * can_modify_mm assumes we have acquired the lock on MM. > > > - */ > > > - if (unlikely(!can_modify_mm(mm, start, end))) > > > - return -EPERM; > > Another approach to improve perf is to clone the vmi (since it > > already point to the first vma), and pass the cloned vmi/vma into > > can_modify_mm check, that will remove the cost of re-finding the first > > VMA. > > > > The can_modify_mm then continues from cloned VMI/vma till the end of > > address range, there will be some perf cost there. However, most > > address ranges in the real world are within a single VMA, in > > practice, the perf cost is the same as checking the single VMA, 99.9% > > case. > > > > This will help preserve the nice sealing feature (if one of the vma is > > sealed, the entire address range is not modified) > > Please drop it. No one wants to preserve this. Everyone is in sync > when it comes to the solution except you. Still, this is another option that will very likely address the perf issue. -Jeff > > -- > Pedro
On Wed, Aug 21, 2024 at 09:15:52AM GMT, Jeff Xu wrote: > On Fri, Aug 16, 2024 at 5:18 PM Pedro Falcato <pedro.falcato@gmail.com> wrote: > > > > We were doing an extra mmap tree traversal just to check if the entire > > range is modifiable. This can be done when we iterate through the VMAs > > instead. > > > > Signed-off-by: Pedro Falcato <pedro.falcato@gmail.com> > > --- > > mm/mmap.c | 11 +---------- > > mm/vma.c | 19 ++++++++++++------- > > 2 files changed, 13 insertions(+), 17 deletions(-) > > > > diff --git a/mm/mmap.c b/mm/mmap.c > > index 3af256bacef3..30ae4cb5cec9 100644 > > --- a/mm/mmap.c > > +++ b/mm/mmap.c > > @@ -1740,16 +1740,7 @@ int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > > unsigned long start, unsigned long end, struct list_head *uf, > > bool unlock) > > { > > - struct mm_struct *mm = vma->vm_mm; > > - > > - /* > > - * Check if memory is sealed, prevent unmapping a sealed VMA. > > - * can_modify_mm assumes we have acquired the lock on MM. > > - */ > > - if (unlikely(!can_modify_mm(mm, start, end))) > > - return -EPERM; > Another approach to improve perf is to clone the vmi (since it > already point to the first vma), and pass the cloned vmi/vma into > can_modify_mm check, that will remove the cost of re-finding the first > VMA. > > The can_modify_mm then continues from cloned VMI/vma till the end of > address range, there will be some perf cost there. However, most > address ranges in the real world are within a single VMA, in > practice, the perf cost is the same as checking the single VMA, 99.9% > case. > > This will help preserve the nice sealing feature (if one of the vma is > sealed, the entire address range is not modified) This is tantamount to saying 'why not drop the entire series and try a totally different approach?' and is frankly a little rude and unprofessional to put as a comment midway through a series like this. I don't agree this sealed property is 'nice', every single other form of failure/inability to perform operations on a VMA is permitted to result in partial operations and an error code. If a VMA is sealed and causes an operation to fail, that's fine. We run on arguments and facts in the kernel, not feelings. Please provide this kind of generalised critique at the 0/7 patch. And try to be vaguely civil. > > > - > > - return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock); > > + return do_vmi_align_munmap(vmi, vma, vma->vm_mm, start, end, uf, unlock); > > } > > > > /* > > diff --git a/mm/vma.c b/mm/vma.c > > index 84965f2cd580..5850f7c0949b 100644 > > --- a/mm/vma.c > > +++ b/mm/vma.c > > @@ -712,6 +712,12 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > > if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count) > > goto map_count_exceeded; > > > > + /* Don't bother splitting the VMA if we can't unmap it anyway */ > > + if (!can_modify_vma(vma)) { > > + error = -EPERM; > > + goto start_split_failed; > > + } > > + > > error = __split_vma(vmi, vma, start, 1); > > if (error) > > goto start_split_failed; > > @@ -723,6 +729,11 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > > */ > > next = vma; > > do { > > + if (!can_modify_vma(next)) { > > + error = -EPERM; > > + goto modify_vma_failed; > > + } > > + > > /* Does it split the end? */ > > if (next->vm_end > end) { > > error = __split_vma(vmi, next, end, 0); > > @@ -815,6 +826,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > > __mt_destroy(&mt_detach); > > return 0; > > > > +modify_vma_failed: > > clear_tree_failed: > > userfaultfd_error: > > munmap_gather_failed: > > @@ -860,13 +872,6 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, > > if (end == start) > > return -EINVAL; > > > > - /* > > - * Check if memory is sealed, prevent unmapping a sealed VMA. > > - * can_modify_mm assumes we have acquired the lock on MM. > > - */ > > - if (unlikely(!can_modify_mm(mm, start, end))) > > - return -EPERM; > > - > > /* Find the first overlapping VMA */ > > vma = vma_find(vmi, end); > > if (!vma) { > > > > -- > > 2.46.0 > >
On Wed, Aug 21, 2024 at 09:33:06AM GMT, Jeff Xu wrote: > On Wed, Aug 21, 2024 at 9:24 AM Pedro Falcato <pedro.falcato@gmail.com> wrote: > > > > On Wed, Aug 21, 2024 at 5:16 PM Jeff Xu <jeffxu@chromium.org> wrote: > > > > > > On Fri, Aug 16, 2024 at 5:18 PM Pedro Falcato <pedro.falcato@gmail.com> wrote: > > > > > > > > We were doing an extra mmap tree traversal just to check if the entire > > > > range is modifiable. This can be done when we iterate through the VMAs > > > > instead. > > > > > > > > Signed-off-by: Pedro Falcato <pedro.falcato@gmail.com> > > > > --- > > > > mm/mmap.c | 11 +---------- > > > > mm/vma.c | 19 ++++++++++++------- > > > > 2 files changed, 13 insertions(+), 17 deletions(-) > > > > > > > > diff --git a/mm/mmap.c b/mm/mmap.c > > > > index 3af256bacef3..30ae4cb5cec9 100644 > > > > --- a/mm/mmap.c > > > > +++ b/mm/mmap.c > > > > @@ -1740,16 +1740,7 @@ int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > > > > unsigned long start, unsigned long end, struct list_head *uf, > > > > bool unlock) > > > > { > > > > - struct mm_struct *mm = vma->vm_mm; > > > > - > > > > - /* > > > > - * Check if memory is sealed, prevent unmapping a sealed VMA. > > > > - * can_modify_mm assumes we have acquired the lock on MM. > > > > - */ > > > > - if (unlikely(!can_modify_mm(mm, start, end))) > > > > - return -EPERM; > > > Another approach to improve perf is to clone the vmi (since it > > > already point to the first vma), and pass the cloned vmi/vma into > > > can_modify_mm check, that will remove the cost of re-finding the first > > > VMA. > > > > > > The can_modify_mm then continues from cloned VMI/vma till the end of > > > address range, there will be some perf cost there. However, most > > > address ranges in the real world are within a single VMA, in > > > practice, the perf cost is the same as checking the single VMA, 99.9% > > > case. > > > > > > This will help preserve the nice sealing feature (if one of the vma is > > > sealed, the entire address range is not modified) > > > > Please drop it. No one wants to preserve this. Everyone is in sync > > when it comes to the solution except you. > > Still, this is another option that will very likely address the perf issue. Nack to your approach. Feel free to send a follow up series replacing Pedro's with yours for review if you feel differently, and stop stalling things. Thanks. > > -Jeff > > > > > -- > > Pedro
* Jeff Xu <jeffxu@chromium.org> [240821 12:33]: > On Wed, Aug 21, 2024 at 9:24 AM Pedro Falcato <pedro.falcato@gmail.com> wrote: > > > > On Wed, Aug 21, 2024 at 5:16 PM Jeff Xu <jeffxu@chromium.org> wrote: > > > > > > On Fri, Aug 16, 2024 at 5:18 PM Pedro Falcato <pedro.falcato@gmail.com> wrote: > > > > > > > > We were doing an extra mmap tree traversal just to check if the entire > > > > range is modifiable. This can be done when we iterate through the VMAs > > > > instead. > > > > > > > > Signed-off-by: Pedro Falcato <pedro.falcato@gmail.com> > > > > --- > > > > mm/mmap.c | 11 +---------- > > > > mm/vma.c | 19 ++++++++++++------- > > > > 2 files changed, 13 insertions(+), 17 deletions(-) > > > > > > > > diff --git a/mm/mmap.c b/mm/mmap.c > > > > index 3af256bacef3..30ae4cb5cec9 100644 > > > > --- a/mm/mmap.c > > > > +++ b/mm/mmap.c > > > > @@ -1740,16 +1740,7 @@ int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > > > > unsigned long start, unsigned long end, struct list_head *uf, > > > > bool unlock) > > > > { > > > > - struct mm_struct *mm = vma->vm_mm; > > > > - > > > > - /* > > > > - * Check if memory is sealed, prevent unmapping a sealed VMA. > > > > - * can_modify_mm assumes we have acquired the lock on MM. > > > > - */ > > > > - if (unlikely(!can_modify_mm(mm, start, end))) > > > > - return -EPERM; > > > Another approach to improve perf is to clone the vmi (since it > > > already point to the first vma), and pass the cloned vmi/vma into > > > can_modify_mm check, that will remove the cost of re-finding the first > > > VMA. > > > > > > The can_modify_mm then continues from cloned VMI/vma till the end of > > > address range, there will be some perf cost there. However, most > > > address ranges in the real world are within a single VMA, in > > > practice, the perf cost is the same as checking the single VMA, 99.9% > > > case. > > > > > > This will help preserve the nice sealing feature (if one of the vma is > > > sealed, the entire address range is not modified) > > > > Please drop it. No one wants to preserve this. Everyone is in sync > > when it comes to the solution except you. > > Still, this is another option that will very likely address the perf issue. The cost of cloning the vmi is a memory copy, while the cost of not cloning the vmi is a re-walk of the tree. Neither of these are free. Both can be avoided by checking the vma flag during the existing loop, which is what is done in this patch set. This is obviously lower cost of either of the above options since they do extra work and still have to check the vma flag(s). I think you are confusing the behaviour of the munmap() call when you state 'partial success' with a potential split operation that may happen prior to aborting a munmap() call. What could happen in the failure scenario is that you'd end up with two vmas instead of one mapping a particular area - but the mseal flag is checked prior to allowing a split to happen, so it'll only split non-mseal()'ed vmas. So what mseal() used to avoid is a call that could potentially split a vma that isn't mseal()'ed, while this patch will allow it to be split. This is how it has been for a very long time and it's okay. Considering how this affects the security model of mseal(), it means the attacker could still accomplish the same feat of splitting that first vma by altering the call to munmap() to avoid an mseal()'ed vma, so there isn't much lost or gained here security wise - if any. Thanks, Liam
diff --git a/mm/mmap.c b/mm/mmap.c index 3af256bacef3..30ae4cb5cec9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1740,16 +1740,7 @@ int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct list_head *uf, bool unlock) { - struct mm_struct *mm = vma->vm_mm; - - /* - * Check if memory is sealed, prevent unmapping a sealed VMA. - * can_modify_mm assumes we have acquired the lock on MM. - */ - if (unlikely(!can_modify_mm(mm, start, end))) - return -EPERM; - - return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock); + return do_vmi_align_munmap(vmi, vma, vma->vm_mm, start, end, uf, unlock); } /* diff --git a/mm/vma.c b/mm/vma.c index 84965f2cd580..5850f7c0949b 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -712,6 +712,12 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count) goto map_count_exceeded; + /* Don't bother splitting the VMA if we can't unmap it anyway */ + if (!can_modify_vma(vma)) { + error = -EPERM; + goto start_split_failed; + } + error = __split_vma(vmi, vma, start, 1); if (error) goto start_split_failed; @@ -723,6 +729,11 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, */ next = vma; do { + if (!can_modify_vma(next)) { + error = -EPERM; + goto modify_vma_failed; + } + /* Does it split the end? */ if (next->vm_end > end) { error = __split_vma(vmi, next, end, 0); @@ -815,6 +826,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, __mt_destroy(&mt_detach); return 0; +modify_vma_failed: clear_tree_failed: userfaultfd_error: munmap_gather_failed: @@ -860,13 +872,6 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, if (end == start) return -EINVAL; - /* - * Check if memory is sealed, prevent unmapping a sealed VMA. - * can_modify_mm assumes we have acquired the lock on MM. - */ - if (unlikely(!can_modify_mm(mm, start, end))) - return -EPERM; - /* Find the first overlapping VMA */ vma = vma_find(vmi, end); if (!vma) {
We were doing an extra mmap tree traversal just to check if the entire range is modifiable. This can be done when we iterate through the VMAs instead. Signed-off-by: Pedro Falcato <pedro.falcato@gmail.com> --- mm/mmap.c | 11 +---------- mm/vma.c | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 17 deletions(-)