diff mbox series

[v3,4/5] mm: userfaultfd: don't separate addr + len arguments

Message ID 20230306225024.264858-5-axelrasmussen@google.com (mailing list archive)
State New
Headers show
Series mm: userfaultfd: refactor and add UFFDIO_CONTINUE_MODE_WP | expand

Commit Message

Axel Rasmussen March 6, 2023, 10:50 p.m. UTC
We have a lot of functions which take an address + length pair,
currently passed as separate arguments. However, in our userspace API we
already have struct uffdio_range, which is exactly this pair, and this
is what we get from userspace when ioctls are called.

Instead of splitting the struct up into two separate arguments, just
plumb the struct through to the functions which use it (once we get to
the mfill_atomic_pte level, we're dealing with single (huge)pages, so we
don't need both parts).

Relatedly, for waking, just re-use this existing structure instead of
defining a new "struct uffdio_wake_range".

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
---
 fs/userfaultfd.c              | 107 +++++++++++++---------------------
 include/linux/userfaultfd_k.h |  17 +++---
 mm/userfaultfd.c              |  92 ++++++++++++++---------------
 3 files changed, 96 insertions(+), 120 deletions(-)

Comments

Peter Xu March 7, 2023, 1:19 a.m. UTC | #1
On Mon, Mar 06, 2023 at 02:50:23PM -0800, Axel Rasmussen wrote:
> We have a lot of functions which take an address + length pair,
> currently passed as separate arguments. However, in our userspace API we
> already have struct uffdio_range, which is exactly this pair, and this
> is what we get from userspace when ioctls are called.
> 
> Instead of splitting the struct up into two separate arguments, just
> plumb the struct through to the functions which use it (once we get to
> the mfill_atomic_pte level, we're dealing with single (huge)pages, so we
> don't need both parts).
> 
> Relatedly, for waking, just re-use this existing structure instead of
> defining a new "struct uffdio_wake_range".
> 
> Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
> ---
>  fs/userfaultfd.c              | 107 +++++++++++++---------------------
>  include/linux/userfaultfd_k.h |  17 +++---
>  mm/userfaultfd.c              |  92 ++++++++++++++---------------
>  3 files changed, 96 insertions(+), 120 deletions(-)
> 
> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> index b8e328123b71..984b63b0fc75 100644
> --- a/fs/userfaultfd.c
> +++ b/fs/userfaultfd.c
> @@ -95,11 +95,6 @@ struct userfaultfd_wait_queue {
>  	bool waken;
>  };
>  
> -struct userfaultfd_wake_range {
> -	unsigned long start;
> -	unsigned long len;
> -};

Would there still be a difference on e.g. 32 bits systems?

[...]

>  static __always_inline int validate_range(struct mm_struct *mm,
> -					  __u64 start, __u64 len)
> +					  const struct uffdio_range *range)
>  {
>  	__u64 task_size = mm->task_size;
>  
> -	if (start & ~PAGE_MASK)
> +	if (range->start & ~PAGE_MASK)
>  		return -EINVAL;
> -	if (len & ~PAGE_MASK)
> +	if (range->len & ~PAGE_MASK)
>  		return -EINVAL;
> -	if (!len)
> +	if (!range->len)
>  		return -EINVAL;
> -	if (start < mmap_min_addr)
> +	if (range->start < mmap_min_addr)
>  		return -EINVAL;
> -	if (start >= task_size)
> +	if (range->start >= task_size)
>  		return -EINVAL;
> -	if (len > task_size - start)
> +	if (range->len > task_size - range->start)
>  		return -EINVAL;
>  	return 0;
>  }

Personally I don't like a lot on such a change. :( It avoids one parameter
being passed over but it can add a lot indirections.

Do you strongly suggest this?  Shall we move on without this so to not
block the last patch (which I assume is the one you're looking for)?

Thanks,
Nadav Amit March 7, 2023, 1:29 a.m. UTC | #2
> On Mar 6, 2023, at 5:19 PM, Peter Xu <peterx@redhat.com> wrote:
> 
> !! External Email
> 
> On Mon, Mar 06, 2023 at 02:50:23PM -0800, Axel Rasmussen wrote:
>> We have a lot of functions which take an address + length pair,
>> currently passed as separate arguments. However, in our userspace API we
>> already have struct uffdio_range, which is exactly this pair, and this
>> is what we get from userspace when ioctls are called.
>> 
>> Instead of splitting the struct up into two separate arguments, just
>> plumb the struct through to the functions which use it (once we get to
>> the mfill_atomic_pte level, we're dealing with single (huge)pages, so we
>> don't need both parts).
>> 
>> Relatedly, for waking, just re-use this existing structure instead of
>> defining a new "struct uffdio_wake_range".
>> 
>> Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
>> ---
>> fs/userfaultfd.c              | 107 +++++++++++++---------------------
>> include/linux/userfaultfd_k.h |  17 +++---
>> mm/userfaultfd.c              |  92 ++++++++++++++---------------
>> 3 files changed, 96 insertions(+), 120 deletions(-)
>> 
>> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
>> index b8e328123b71..984b63b0fc75 100644
>> --- a/fs/userfaultfd.c
>> +++ b/fs/userfaultfd.c
>> @@ -95,11 +95,6 @@ struct userfaultfd_wait_queue {
>>      bool waken;
>> };
>> 
>> -struct userfaultfd_wake_range {
>> -     unsigned long start;
>> -     unsigned long len;
>> -};
> 
> Would there still be a difference on e.g. 32 bits systems?
> 
> [...]
> 
>> static __always_inline int validate_range(struct mm_struct *mm,
>> -                                       __u64 start, __u64 len)
>> +                                       const struct uffdio_range *range)
>> {
>>      __u64 task_size = mm->task_size;
>> 
>> -     if (start & ~PAGE_MASK)
>> +     if (range->start & ~PAGE_MASK)
>>              return -EINVAL;
>> -     if (len & ~PAGE_MASK)
>> +     if (range->len & ~PAGE_MASK)
>>              return -EINVAL;
>> -     if (!len)
>> +     if (!range->len)
>>              return -EINVAL;
>> -     if (start < mmap_min_addr)
>> +     if (range->start < mmap_min_addr)
>>              return -EINVAL;
>> -     if (start >= task_size)
>> +     if (range->start >= task_size)
>>              return -EINVAL;
>> -     if (len > task_size - start)
>> +     if (range->len > task_size - range->start)
>>              return -EINVAL;
>>      return 0;
>> }
> 
> Personally I don't like a lot on such a change. :( It avoids one parameter
> being passed over but it can add a lot indirections.
> 
> Do you strongly suggest this?  Shall we move on without this so to not
> block the last patch (which I assume is the one you're looking for)?

Just in case you missed, it is __always_inline, so I presume that from a
generated code point-of-view it is the same.

Having said that, small assignments to local start, let and range variables
would make the code easier to read and reduce the change-set.
Axel Rasmussen March 7, 2023, 6:52 p.m. UTC | #3
On Mon, Mar 6, 2023 at 5:30 PM Nadav Amit <namit@vmware.com> wrote:

>
>
> > On Mar 6, 2023, at 5:19 PM, Peter Xu <peterx@redhat.com> wrote:
> >
> > !! External Email
> >
> > On Mon, Mar 06, 2023 at 02:50:23PM -0800, Axel Rasmussen wrote:
> >> We have a lot of functions which take an address + length pair,
> >> currently passed as separate arguments. However, in our userspace API we
> >> already have struct uffdio_range, which is exactly this pair, and this
> >> is what we get from userspace when ioctls are called.
> >>
> >> Instead of splitting the struct up into two separate arguments, just
> >> plumb the struct through to the functions which use it (once we get to
> >> the mfill_atomic_pte level, we're dealing with single (huge)pages, so we
> >> don't need both parts).
> >>
> >> Relatedly, for waking, just re-use this existing structure instead of
> >> defining a new "struct uffdio_wake_range".
> >>
> >> Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
> >> ---
> >> fs/userfaultfd.c              | 107 +++++++++++++---------------------
> >> include/linux/userfaultfd_k.h |  17 +++---
> >> mm/userfaultfd.c              |  92 ++++++++++++++---------------
> >> 3 files changed, 96 insertions(+), 120 deletions(-)
> >>
> >> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> >> index b8e328123b71..984b63b0fc75 100644
> >> --- a/fs/userfaultfd.c
> >> +++ b/fs/userfaultfd.c
> >> @@ -95,11 +95,6 @@ struct userfaultfd_wait_queue {
> >>      bool waken;
> >> };
> >>
> >> -struct userfaultfd_wake_range {
> >> -     unsigned long start;
> >> -     unsigned long len;
> >> -};
> >
> > Would there still be a difference on e.g. 32 bits systems?
>

My assumption is that __u64 is at least 64 bits wide on all platforms, and
so it is sufficient. I believe the standard allows unsigned long to be
32-bits, so __u64 may be overkill on some platforms, but to me the cost is
small enough I'd prefer to avoid defining a second almost-identical
structure.

Then again though as I say below, I don't feel strongly about this refactor.


> >
> > [...]
> >
> >> static __always_inline int validate_range(struct mm_struct *mm,
> >> -                                       __u64 start, __u64 len)
> >> +                                       const struct uffdio_range
> *range)
> >> {
> >>      __u64 task_size = mm->task_size;
> >>
> >> -     if (start & ~PAGE_MASK)
> >> +     if (range->start & ~PAGE_MASK)
> >>              return -EINVAL;
> >> -     if (len & ~PAGE_MASK)
> >> +     if (range->len & ~PAGE_MASK)
> >>              return -EINVAL;
> >> -     if (!len)
> >> +     if (!range->len)
> >>              return -EINVAL;
> >> -     if (start < mmap_min_addr)
> >> +     if (range->start < mmap_min_addr)
> >>              return -EINVAL;
> >> -     if (start >= task_size)
> >> +     if (range->start >= task_size)
> >>              return -EINVAL;
> >> -     if (len > task_size - start)
> >> +     if (range->len > task_size - range->start)
> >>              return -EINVAL;
> >>      return 0;
> >> }
> >
> > Personally I don't like a lot on such a change. :( It avoids one
> parameter
> > being passed over but it can add a lot indirections.
> >
> > Do you strongly suggest this?  Shall we move on without this so to not
> > block the last patch (which I assume is the one you're looking for)?
>

I don't feel strongly, I'm fine with dropping this patch. I'll make that
change in a v4 (I think there will be some conflicts to resolve in the
patches after this one, so I'll post a new version to avoid troubling
anyone).


>
> Just in case you missed, it is __always_inline, so I presume that from a
> generated code point-of-view it is the same.
>
> Having said that, small assignments to local start, let and range variables
> would make the code easier to read and reduce the change-set.
>
>
kernel test robot March 8, 2023, 9:51 a.m. UTC | #4
Hi Axel,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.3-rc1]
[cannot apply to akpm-mm/mm-everything next-20230308]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Axel-Rasmussen/mm-userfaultfd-rename-functions-for-clarity-consistency/20230307-065203
patch link:    https://lore.kernel.org/r/20230306225024.264858-5-axelrasmussen%40google.com
patch subject: [PATCH v3 4/5] mm: userfaultfd: don't separate addr + len arguments
config: x86_64-randconfig-a011-20230306 (https://download.01.org/0day-ci/archive/20230308/202303081703.nwxAgIVH-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/cee642b93be3ae01c7cc737c0176cbc16074a25a
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Axel-Rasmussen/mm-userfaultfd-rename-functions-for-clarity-consistency/20230307-065203
        git checkout cee642b93be3ae01c7cc737c0176cbc16074a25a
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303081703.nwxAgIVH-lkp@intel.com/

All errors (new ones prefixed by >>):

>> mm/userfaultfd.c:577:52: error: passing 'const struct uffdio_range *' to parameter of incompatible type 'struct uffdio_range'
                   return  mfill_atomic_hugetlb(dst_vma, src_start, dst, flags);
                                                                    ^~~
   mm/userfaultfd.c:463:29: note: passing argument to parameter 'dst' here
                                       struct uffdio_range dst,
                                                           ^
   1 error generated.


vim +577 mm/userfaultfd.c

   508	
   509	static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm,
   510						    unsigned long src_start,
   511						    const struct uffdio_range *dst,
   512						    atomic_t *mmap_changing,
   513						    uffd_flags_t flags)
   514	{
   515		struct vm_area_struct *dst_vma;
   516		ssize_t err;
   517		pmd_t *dst_pmd;
   518		unsigned long src_addr, dst_addr;
   519		long copied;
   520		struct page *page;
   521	
   522		/*
   523		 * Sanitize the command parameters:
   524		 */
   525		BUG_ON(dst->start & ~PAGE_MASK);
   526		BUG_ON(dst->len & ~PAGE_MASK);
   527	
   528		/* Does the address range wrap, or is the span zero-sized? */
   529		BUG_ON(src_start + dst->len <= src_start);
   530		BUG_ON(dst->start + dst->len <= dst->start);
   531	
   532		src_addr = src_start;
   533		dst_addr = dst->start;
   534		copied = 0;
   535		page = NULL;
   536	retry:
   537		mmap_read_lock(dst_mm);
   538	
   539		/*
   540		 * If memory mappings are changing because of non-cooperative
   541		 * operation (e.g. mremap) running in parallel, bail out and
   542		 * request the user to retry later
   543		 */
   544		err = -EAGAIN;
   545		if (mmap_changing && atomic_read(mmap_changing))
   546			goto out_unlock;
   547	
   548		/*
   549		 * Make sure the vma is not shared, that the dst range is
   550		 * both valid and fully within a single existing vma.
   551		 */
   552		err = -ENOENT;
   553		dst_vma = find_dst_vma(dst_mm, dst);
   554		if (!dst_vma)
   555			goto out_unlock;
   556	
   557		err = -EINVAL;
   558		/*
   559		 * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
   560		 * it will overwrite vm_ops, so vma_is_anonymous must return false.
   561		 */
   562		if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
   563		    dst_vma->vm_flags & VM_SHARED))
   564			goto out_unlock;
   565	
   566		/*
   567		 * validate 'mode' now that we know the dst_vma: don't allow
   568		 * a wrprotect copy if the userfaultfd didn't register as WP.
   569		 */
   570		if ((flags & MFILL_ATOMIC_WP) && !(dst_vma->vm_flags & VM_UFFD_WP))
   571			goto out_unlock;
   572	
   573		/*
   574		 * If this is a HUGETLB vma, pass off to appropriate routine
   575		 */
   576		if (is_vm_hugetlb_page(dst_vma))
 > 577			return  mfill_atomic_hugetlb(dst_vma, src_start, dst, flags);
   578	
   579		if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
   580			goto out_unlock;
   581		if (!vma_is_shmem(dst_vma) &&
   582		    (flags & MFILL_ATOMIC_MODE_MASK) == MFILL_ATOMIC_CONTINUE)
   583			goto out_unlock;
   584	
   585		/*
   586		 * Ensure the dst_vma has a anon_vma or this page
   587		 * would get a NULL anon_vma when moved in the
   588		 * dst_vma.
   589		 */
   590		err = -ENOMEM;
   591		if (!(dst_vma->vm_flags & VM_SHARED) &&
   592		    unlikely(anon_vma_prepare(dst_vma)))
   593			goto out_unlock;
   594	
   595		while (src_addr < src_start + dst->len) {
   596			pmd_t dst_pmdval;
   597	
   598			BUG_ON(dst_addr >= dst->start + dst->len);
   599	
   600			dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
   601			if (unlikely(!dst_pmd)) {
   602				err = -ENOMEM;
   603				break;
   604			}
   605	
   606			dst_pmdval = pmdp_get_lockless(dst_pmd);
   607			/*
   608			 * If the dst_pmd is mapped as THP don't
   609			 * override it and just be strict.
   610			 */
   611			if (unlikely(pmd_trans_huge(dst_pmdval))) {
   612				err = -EEXIST;
   613				break;
   614			}
   615			if (unlikely(pmd_none(dst_pmdval)) &&
   616			    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
   617				err = -ENOMEM;
   618				break;
   619			}
   620			/* If an huge pmd materialized from under us fail */
   621			if (unlikely(pmd_trans_huge(*dst_pmd))) {
   622				err = -EFAULT;
   623				break;
   624			}
   625	
   626			BUG_ON(pmd_none(*dst_pmd));
   627			BUG_ON(pmd_trans_huge(*dst_pmd));
   628	
   629			err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr,
   630					       src_addr, &page, flags);
   631			cond_resched();
   632	
   633			if (unlikely(err == -ENOENT)) {
   634				void *page_kaddr;
   635	
   636				mmap_read_unlock(dst_mm);
   637				BUG_ON(!page);
   638	
   639				page_kaddr = kmap_local_page(page);
   640				err = copy_from_user(page_kaddr,
   641						     (const void __user *) src_addr,
   642						     PAGE_SIZE);
   643				kunmap_local(page_kaddr);
   644				if (unlikely(err)) {
   645					err = -EFAULT;
   646					goto out;
   647				}
   648				flush_dcache_page(page);
   649				goto retry;
   650			} else
   651				BUG_ON(page);
   652	
   653			if (!err) {
   654				dst_addr += PAGE_SIZE;
   655				src_addr += PAGE_SIZE;
   656				copied += PAGE_SIZE;
   657	
   658				if (fatal_signal_pending(current))
   659					err = -EINTR;
   660			}
   661			if (err)
   662				break;
   663		}
   664	
   665	out_unlock:
   666		mmap_read_unlock(dst_mm);
   667	out:
   668		if (page)
   669			put_page(page);
   670		BUG_ON(copied < 0);
   671		BUG_ON(err > 0);
   672		BUG_ON(!copied && !err);
   673		return copied ? copied : err;
   674	}
   675
Axel Rasmussen March 8, 2023, 6:48 p.m. UTC | #5
On Wed, Mar 8, 2023 at 1:52 AM kernel test robot <lkp@intel.com> wrote:
>
> Hi Axel,
>
> Thank you for the patch! Yet something to improve:
>
> [auto build test ERROR on linus/master]
> [also build test ERROR on v6.3-rc1]
> [cannot apply to akpm-mm/mm-everything next-20230308]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>
> url:    https://github.com/intel-lab-lkp/linux/commits/Axel-Rasmussen/mm-userfaultfd-rename-functions-for-clarity-consistency/20230307-065203
> patch link:    https://lore.kernel.org/r/20230306225024.264858-5-axelrasmussen%40google.com
> patch subject: [PATCH v3 4/5] mm: userfaultfd: don't separate addr + len arguments
> config: x86_64-randconfig-a011-20230306 (https://download.01.org/0day-ci/archive/20230308/202303081703.nwxAgIVH-lkp@intel.com/config)
> compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
> reproduce (this is a W=1 build):
>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # https://github.com/intel-lab-lkp/linux/commit/cee642b93be3ae01c7cc737c0176cbc16074a25a
>         git remote add linux-review https://github.com/intel-lab-lkp/linux
>         git fetch --no-tags linux-review Axel-Rasmussen/mm-userfaultfd-rename-functions-for-clarity-consistency/20230307-065203
>         git checkout cee642b93be3ae01c7cc737c0176cbc16074a25a
>         # save the config file
>         mkdir build_dir && cp config build_dir/.config
>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 olddefconfig
>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
>
> If you fix the issue, kindly add following tag where applicable
> | Reported-by: kernel test robot <lkp@intel.com>
> | Link: https://lore.kernel.org/oe-kbuild-all/202303081703.nwxAgIVH-lkp@intel.com/
>
> All errors (new ones prefixed by >>):
>
> >> mm/userfaultfd.c:577:52: error: passing 'const struct uffdio_range *' to parameter of incompatible type 'struct uffdio_range'
>                    return  mfill_atomic_hugetlb(dst_vma, src_start, dst, flags);
>                                                                     ^~~
>    mm/userfaultfd.c:463:29: note: passing argument to parameter 'dst' here
>                                        struct uffdio_range dst,
>                                                            ^
>    1 error generated.

Whoops. :) I admittedly didn't test with !CONFIG_HUGETLB_PAGE.

The next version of this series will drop this patch as per discussion
though, so the issue is moot.

>
>
> vim +577 mm/userfaultfd.c
>
>    508
>    509  static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm,
>    510                                              unsigned long src_start,
>    511                                              const struct uffdio_range *dst,
>    512                                              atomic_t *mmap_changing,
>    513                                              uffd_flags_t flags)
>    514  {
>    515          struct vm_area_struct *dst_vma;
>    516          ssize_t err;
>    517          pmd_t *dst_pmd;
>    518          unsigned long src_addr, dst_addr;
>    519          long copied;
>    520          struct page *page;
>    521
>    522          /*
>    523           * Sanitize the command parameters:
>    524           */
>    525          BUG_ON(dst->start & ~PAGE_MASK);
>    526          BUG_ON(dst->len & ~PAGE_MASK);
>    527
>    528          /* Does the address range wrap, or is the span zero-sized? */
>    529          BUG_ON(src_start + dst->len <= src_start);
>    530          BUG_ON(dst->start + dst->len <= dst->start);
>    531
>    532          src_addr = src_start;
>    533          dst_addr = dst->start;
>    534          copied = 0;
>    535          page = NULL;
>    536  retry:
>    537          mmap_read_lock(dst_mm);
>    538
>    539          /*
>    540           * If memory mappings are changing because of non-cooperative
>    541           * operation (e.g. mremap) running in parallel, bail out and
>    542           * request the user to retry later
>    543           */
>    544          err = -EAGAIN;
>    545          if (mmap_changing && atomic_read(mmap_changing))
>    546                  goto out_unlock;
>    547
>    548          /*
>    549           * Make sure the vma is not shared, that the dst range is
>    550           * both valid and fully within a single existing vma.
>    551           */
>    552          err = -ENOENT;
>    553          dst_vma = find_dst_vma(dst_mm, dst);
>    554          if (!dst_vma)
>    555                  goto out_unlock;
>    556
>    557          err = -EINVAL;
>    558          /*
>    559           * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
>    560           * it will overwrite vm_ops, so vma_is_anonymous must return false.
>    561           */
>    562          if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
>    563              dst_vma->vm_flags & VM_SHARED))
>    564                  goto out_unlock;
>    565
>    566          /*
>    567           * validate 'mode' now that we know the dst_vma: don't allow
>    568           * a wrprotect copy if the userfaultfd didn't register as WP.
>    569           */
>    570          if ((flags & MFILL_ATOMIC_WP) && !(dst_vma->vm_flags & VM_UFFD_WP))
>    571                  goto out_unlock;
>    572
>    573          /*
>    574           * If this is a HUGETLB vma, pass off to appropriate routine
>    575           */
>    576          if (is_vm_hugetlb_page(dst_vma))
>  > 577                  return  mfill_atomic_hugetlb(dst_vma, src_start, dst, flags);
>    578
>    579          if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
>    580                  goto out_unlock;
>    581          if (!vma_is_shmem(dst_vma) &&
>    582              (flags & MFILL_ATOMIC_MODE_MASK) == MFILL_ATOMIC_CONTINUE)
>    583                  goto out_unlock;
>    584
>    585          /*
>    586           * Ensure the dst_vma has a anon_vma or this page
>    587           * would get a NULL anon_vma when moved in the
>    588           * dst_vma.
>    589           */
>    590          err = -ENOMEM;
>    591          if (!(dst_vma->vm_flags & VM_SHARED) &&
>    592              unlikely(anon_vma_prepare(dst_vma)))
>    593                  goto out_unlock;
>    594
>    595          while (src_addr < src_start + dst->len) {
>    596                  pmd_t dst_pmdval;
>    597
>    598                  BUG_ON(dst_addr >= dst->start + dst->len);
>    599
>    600                  dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
>    601                  if (unlikely(!dst_pmd)) {
>    602                          err = -ENOMEM;
>    603                          break;
>    604                  }
>    605
>    606                  dst_pmdval = pmdp_get_lockless(dst_pmd);
>    607                  /*
>    608                   * If the dst_pmd is mapped as THP don't
>    609                   * override it and just be strict.
>    610                   */
>    611                  if (unlikely(pmd_trans_huge(dst_pmdval))) {
>    612                          err = -EEXIST;
>    613                          break;
>    614                  }
>    615                  if (unlikely(pmd_none(dst_pmdval)) &&
>    616                      unlikely(__pte_alloc(dst_mm, dst_pmd))) {
>    617                          err = -ENOMEM;
>    618                          break;
>    619                  }
>    620                  /* If an huge pmd materialized from under us fail */
>    621                  if (unlikely(pmd_trans_huge(*dst_pmd))) {
>    622                          err = -EFAULT;
>    623                          break;
>    624                  }
>    625
>    626                  BUG_ON(pmd_none(*dst_pmd));
>    627                  BUG_ON(pmd_trans_huge(*dst_pmd));
>    628
>    629                  err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr,
>    630                                         src_addr, &page, flags);
>    631                  cond_resched();
>    632
>    633                  if (unlikely(err == -ENOENT)) {
>    634                          void *page_kaddr;
>    635
>    636                          mmap_read_unlock(dst_mm);
>    637                          BUG_ON(!page);
>    638
>    639                          page_kaddr = kmap_local_page(page);
>    640                          err = copy_from_user(page_kaddr,
>    641                                               (const void __user *) src_addr,
>    642                                               PAGE_SIZE);
>    643                          kunmap_local(page_kaddr);
>    644                          if (unlikely(err)) {
>    645                                  err = -EFAULT;
>    646                                  goto out;
>    647                          }
>    648                          flush_dcache_page(page);
>    649                          goto retry;
>    650                  } else
>    651                          BUG_ON(page);
>    652
>    653                  if (!err) {
>    654                          dst_addr += PAGE_SIZE;
>    655                          src_addr += PAGE_SIZE;
>    656                          copied += PAGE_SIZE;
>    657
>    658                          if (fatal_signal_pending(current))
>    659                                  err = -EINTR;
>    660                  }
>    661                  if (err)
>    662                          break;
>    663          }
>    664
>    665  out_unlock:
>    666          mmap_read_unlock(dst_mm);
>    667  out:
>    668          if (page)
>    669                  put_page(page);
>    670          BUG_ON(copied < 0);
>    671          BUG_ON(err > 0);
>    672          BUG_ON(!copied && !err);
>    673          return copied ? copied : err;
>    674  }
>    675
>
> --
> 0-DAY CI Kernel Test Service
> https://github.com/intel/lkp-tests
diff mbox series

Patch

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index b8e328123b71..984b63b0fc75 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -95,11 +95,6 @@  struct userfaultfd_wait_queue {
 	bool waken;
 };
 
-struct userfaultfd_wake_range {
-	unsigned long start;
-	unsigned long len;
-};
-
 /* internal indication that UFFD_API ioctl was successfully executed */
 #define UFFD_FEATURE_INITIALIZED		(1u << 31)
 
@@ -126,7 +121,7 @@  static void userfaultfd_set_vm_flags(struct vm_area_struct *vma,
 static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
 				     int wake_flags, void *key)
 {
-	struct userfaultfd_wake_range *range = key;
+	struct uffdio_range *range = key;
 	int ret;
 	struct userfaultfd_wait_queue *uwq;
 	unsigned long start, len;
@@ -881,7 +876,7 @@  static int userfaultfd_release(struct inode *inode, struct file *file)
 	struct mm_struct *mm = ctx->mm;
 	struct vm_area_struct *vma, *prev;
 	/* len == 0 means wake all */
-	struct userfaultfd_wake_range range = { .len = 0, };
+	struct uffdio_range range = {0};
 	unsigned long new_flags;
 	VMA_ITERATOR(vmi, mm, 0);
 
@@ -1226,7 +1221,7 @@  static ssize_t userfaultfd_read(struct file *file, char __user *buf,
 }
 
 static void __wake_userfault(struct userfaultfd_ctx *ctx,
-			     struct userfaultfd_wake_range *range)
+			     struct uffdio_range *range)
 {
 	spin_lock_irq(&ctx->fault_pending_wqh.lock);
 	/* wake all in the range and autoremove */
@@ -1239,7 +1234,7 @@  static void __wake_userfault(struct userfaultfd_ctx *ctx,
 }
 
 static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
-					   struct userfaultfd_wake_range *range)
+					   struct uffdio_range *range)
 {
 	unsigned seq;
 	bool need_wakeup;
@@ -1270,21 +1265,21 @@  static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
 }
 
 static __always_inline int validate_range(struct mm_struct *mm,
-					  __u64 start, __u64 len)
+					  const struct uffdio_range *range)
 {
 	__u64 task_size = mm->task_size;
 
-	if (start & ~PAGE_MASK)
+	if (range->start & ~PAGE_MASK)
 		return -EINVAL;
-	if (len & ~PAGE_MASK)
+	if (range->len & ~PAGE_MASK)
 		return -EINVAL;
-	if (!len)
+	if (!range->len)
 		return -EINVAL;
-	if (start < mmap_min_addr)
+	if (range->start < mmap_min_addr)
 		return -EINVAL;
-	if (start >= task_size)
+	if (range->start >= task_size)
 		return -EINVAL;
-	if (len > task_size - start)
+	if (range->len > task_size - range->start)
 		return -EINVAL;
 	return 0;
 }
@@ -1331,8 +1326,7 @@  static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 		vm_flags |= VM_UFFD_MINOR;
 	}
 
-	ret = validate_range(mm, uffdio_register.range.start,
-			     uffdio_register.range.len);
+	ret = validate_range(mm, &uffdio_register.range);
 	if (ret)
 		goto out;
 
@@ -1538,11 +1532,11 @@  static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 	if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
 		goto out;
 
-	ret = validate_range(mm, uffdio_unregister.start,
-			     uffdio_unregister.len);
+	ret = validate_range(mm, &uffdio_unregister);
 	if (ret)
 		goto out;
 
+	/* Get rid of start + end in favor of range *? */
 	start = uffdio_unregister.start;
 	end = start + uffdio_unregister.len;
 
@@ -1597,6 +1591,7 @@  static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 	prev = vma_prev(&vmi);
 	ret = 0;
 	for_each_vma_range(vmi, vma, end) {
+		struct uffdio_range range;
 		cond_resched();
 
 		BUG_ON(!vma_can_userfault(vma, vma->vm_flags));
@@ -1614,6 +1609,8 @@  static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 			start = vma->vm_start;
 		vma_end = min(end, vma->vm_end);
 
+		range.start = start;
+		range.len = vma_end - start;
 		if (userfaultfd_missing(vma)) {
 			/*
 			 * Wake any concurrent pending userfault while
@@ -1621,15 +1618,12 @@  static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 			 * permanently and it avoids userland to call
 			 * UFFDIO_WAKE explicitly.
 			 */
-			struct userfaultfd_wake_range range;
-			range.start = start;
-			range.len = vma_end - start;
 			wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range);
 		}
 
 		/* Reset ptes for the whole vma range if wr-protected */
 		if (userfaultfd_wp(vma))
-			uffd_wp_range(vma, start, vma_end - start, false);
+			uffd_wp_range(vma, &range, false);
 
 		new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
 		prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags,
@@ -1680,27 +1674,23 @@  static int userfaultfd_wake(struct userfaultfd_ctx *ctx,
 {
 	int ret;
 	struct uffdio_range uffdio_wake;
-	struct userfaultfd_wake_range range;
 	const void __user *buf = (void __user *)arg;
 
 	ret = -EFAULT;
 	if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake)))
 		goto out;
 
-	ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len);
+	ret = validate_range(ctx->mm, &uffdio_wake);
 	if (ret)
 		goto out;
 
-	range.start = uffdio_wake.start;
-	range.len = uffdio_wake.len;
-
 	/*
 	 * len == 0 means wake all and we don't want to wake all here,
 	 * so check it again to be sure.
 	 */
-	VM_BUG_ON(!range.len);
+	VM_BUG_ON(!uffdio_wake.len);
 
-	wake_userfault(ctx, &range);
+	wake_userfault(ctx, &uffdio_wake);
 	ret = 0;
 
 out:
@@ -1713,7 +1703,7 @@  static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
 	__s64 ret;
 	struct uffdio_copy uffdio_copy;
 	struct uffdio_copy __user *user_uffdio_copy;
-	struct userfaultfd_wake_range range;
+	struct uffdio_range range;
 	int flags = 0;
 
 	user_uffdio_copy = (struct uffdio_copy __user *) arg;
@@ -1728,7 +1718,9 @@  static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
 			   sizeof(uffdio_copy)-sizeof(__s64)))
 		goto out;
 
-	ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len);
+	range.start = uffdio_copy.dst;
+	range.len = uffdio_copy.len;
+	ret = validate_range(ctx->mm, &range);
 	if (ret)
 		goto out;
 	/*
@@ -1744,9 +1736,8 @@  static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
 	if (uffdio_copy.mode & UFFDIO_COPY_MODE_WP)
 		flags |= MFILL_ATOMIC_WP;
 	if (mmget_not_zero(ctx->mm)) {
-		ret = mfill_atomic_copy(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
-					uffdio_copy.len, &ctx->mmap_changing,
-					flags);
+		ret = mfill_atomic_copy(ctx->mm, uffdio_copy.src, &range,
+					&ctx->mmap_changing, flags);
 		mmput(ctx->mm);
 	} else {
 		return -ESRCH;
@@ -1758,10 +1749,8 @@  static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
 	BUG_ON(!ret);
 	/* len == 0 would wake all */
 	range.len = ret;
-	if (!(uffdio_copy.mode & UFFDIO_COPY_MODE_DONTWAKE)) {
-		range.start = uffdio_copy.dst;
+	if (!(uffdio_copy.mode & UFFDIO_COPY_MODE_DONTWAKE))
 		wake_userfault(ctx, &range);
-	}
 	ret = range.len == uffdio_copy.len ? 0 : -EAGAIN;
 out:
 	return ret;
@@ -1773,7 +1762,7 @@  static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
 	__s64 ret;
 	struct uffdio_zeropage uffdio_zeropage;
 	struct uffdio_zeropage __user *user_uffdio_zeropage;
-	struct userfaultfd_wake_range range;
+	struct uffdio_range range;
 
 	user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg;
 
@@ -1787,8 +1776,8 @@  static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
 			   sizeof(uffdio_zeropage)-sizeof(__s64)))
 		goto out;
 
-	ret = validate_range(ctx->mm, uffdio_zeropage.range.start,
-			     uffdio_zeropage.range.len);
+	range = uffdio_zeropage.range;
+	ret = validate_range(ctx->mm, &range);
 	if (ret)
 		goto out;
 	ret = -EINVAL;
@@ -1796,8 +1785,7 @@  static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
 		goto out;
 
 	if (mmget_not_zero(ctx->mm)) {
-		ret = mfill_atomic_zeropage(ctx->mm, uffdio_zeropage.range.start,
-					   uffdio_zeropage.range.len,
+		ret = mfill_atomic_zeropage(ctx->mm, &uffdio_zeropage.range,
 					   &ctx->mmap_changing);
 		mmput(ctx->mm);
 	} else {
@@ -1811,7 +1799,6 @@  static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
 	BUG_ON(!ret);
 	range.len = ret;
 	if (!(uffdio_zeropage.mode & UFFDIO_ZEROPAGE_MODE_DONTWAKE)) {
-		range.start = uffdio_zeropage.range.start;
 		wake_userfault(ctx, &range);
 	}
 	ret = range.len == uffdio_zeropage.range.len ? 0 : -EAGAIN;
@@ -1825,7 +1812,6 @@  static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
 	int ret;
 	struct uffdio_writeprotect uffdio_wp;
 	struct uffdio_writeprotect __user *user_uffdio_wp;
-	struct userfaultfd_wake_range range;
 	bool mode_wp, mode_dontwake;
 
 	if (atomic_read(&ctx->mmap_changing))
@@ -1837,8 +1823,7 @@  static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
 			   sizeof(struct uffdio_writeprotect)))
 		return -EFAULT;
 
-	ret = validate_range(ctx->mm, uffdio_wp.range.start,
-			     uffdio_wp.range.len);
+	ret = validate_range(ctx->mm, &uffdio_wp.range);
 	if (ret)
 		return ret;
 
@@ -1853,9 +1838,8 @@  static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
 		return -EINVAL;
 
 	if (mmget_not_zero(ctx->mm)) {
-		ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start,
-					  uffdio_wp.range.len, mode_wp,
-					  &ctx->mmap_changing);
+		ret = mwriteprotect_range(ctx->mm, &uffdio_wp.range,
+					  mode_wp, &ctx->mmap_changing);
 		mmput(ctx->mm);
 	} else {
 		return -ESRCH;
@@ -1864,11 +1848,8 @@  static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
 	if (ret)
 		return ret;
 
-	if (!mode_wp && !mode_dontwake) {
-		range.start = uffdio_wp.range.start;
-		range.len = uffdio_wp.range.len;
-		wake_userfault(ctx, &range);
-	}
+	if (!mode_wp && !mode_dontwake)
+		wake_userfault(ctx, &uffdio_wp.range);
 	return ret;
 }
 
@@ -1877,7 +1858,7 @@  static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
 	__s64 ret;
 	struct uffdio_continue uffdio_continue;
 	struct uffdio_continue __user *user_uffdio_continue;
-	struct userfaultfd_wake_range range;
+	struct uffdio_range range;
 
 	user_uffdio_continue = (struct uffdio_continue __user *)arg;
 
@@ -1891,23 +1872,20 @@  static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
 			   sizeof(uffdio_continue) - (sizeof(__s64))))
 		goto out;
 
-	ret = validate_range(ctx->mm, uffdio_continue.range.start,
-			     uffdio_continue.range.len);
+	range = uffdio_continue.range;
+	ret = validate_range(ctx->mm, &range);
 	if (ret)
 		goto out;
 
 	ret = -EINVAL;
 	/* double check for wraparound just in case. */
-	if (uffdio_continue.range.start + uffdio_continue.range.len <=
-	    uffdio_continue.range.start) {
+	if (range.start + range.len <= range.start)
 		goto out;
-	}
 	if (uffdio_continue.mode & ~UFFDIO_CONTINUE_MODE_DONTWAKE)
 		goto out;
 
 	if (mmget_not_zero(ctx->mm)) {
-		ret = mfill_atomic_continue(ctx->mm, uffdio_continue.range.start,
-					    uffdio_continue.range.len,
+		ret = mfill_atomic_continue(ctx->mm, &range,
 					    &ctx->mmap_changing);
 		mmput(ctx->mm);
 	} else {
@@ -1923,7 +1901,6 @@  static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
 	BUG_ON(!ret);
 	range.len = ret;
 	if (!(uffdio_continue.mode & UFFDIO_CONTINUE_MODE_DONTWAKE)) {
-		range.start = uffdio_continue.range.start;
 		wake_userfault(ctx, &range);
 	}
 	ret = range.len == uffdio_continue.range.len ? 0 : -EAGAIN;
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index a45c1b42e500..fcd95e3d3dcd 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -63,20 +63,21 @@  extern int mfill_atomic_install_pte(pmd_t *dst_pmd,
 				    unsigned long dst_addr, struct page *page,
 				    bool newly_allocated, uffd_flags_t flags);
 
-extern ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long dst_start,
-				 unsigned long src_start, unsigned long len,
+extern ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long src_start,
+				 const struct uffdio_range *dst,
 				 atomic_t *mmap_changing, uffd_flags_t flags);
 extern ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm,
-				     unsigned long dst_start,
-				     unsigned long len,
+				     const struct uffdio_range *dst,
 				     atomic_t *mmap_changing);
-extern ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long dst_start,
-				     unsigned long len, atomic_t *mmap_changing);
+extern ssize_t mfill_atomic_continue(struct mm_struct *dst_mm,
+				     const struct uffdio_range *dst,
+				    atomic_t *mmap_changing);
 extern int mwriteprotect_range(struct mm_struct *dst_mm,
-			       unsigned long start, unsigned long len,
+			       const struct uffdio_range *range,
 			       bool enable_wp, atomic_t *mmap_changing);
 extern long uffd_wp_range(struct vm_area_struct *vma,
-			  unsigned long start, unsigned long len, bool enable_wp);
+			  const struct uffdio_range *range,
+			  bool enable_wp);
 
 /* mm helpers */
 static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index c0d061acc069..870e7489e8d1 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -21,8 +21,7 @@ 
 
 static __always_inline
 struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
-				    unsigned long dst_start,
-				    unsigned long len)
+				    const struct uffdio_range *dst)
 {
 	/*
 	 * Make sure that the dst range is both valid and fully within a
@@ -30,12 +29,12 @@  struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
 	 */
 	struct vm_area_struct *dst_vma;
 
-	dst_vma = find_vma(dst_mm, dst_start);
+	dst_vma = find_vma(dst_mm, dst->start);
 	if (!dst_vma)
 		return NULL;
 
-	if (dst_start < dst_vma->vm_start ||
-	    dst_start + len > dst_vma->vm_end)
+	if (dst->start < dst_vma->vm_start ||
+	    dst->start + dst->len > dst_vma->vm_end)
 		return NULL;
 
 	/*
@@ -309,9 +308,8 @@  static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
  */
 static __always_inline ssize_t mfill_atomic_hugetlb(
 					      struct vm_area_struct *dst_vma,
-					      unsigned long dst_start,
 					      unsigned long src_start,
-					      unsigned long len,
+					      const struct uffdio_range *dst,
 					      uffd_flags_t flags)
 {
 	int mode = flags & MFILL_ATOMIC_MODE_MASK;
@@ -339,7 +337,7 @@  static __always_inline ssize_t mfill_atomic_hugetlb(
 	}
 
 	src_addr = src_start;
-	dst_addr = dst_start;
+	dst_addr = dst->start;
 	copied = 0;
 	page = NULL;
 	vma_hpagesize = vma_kernel_pagesize(dst_vma);
@@ -348,7 +346,7 @@  static __always_inline ssize_t mfill_atomic_hugetlb(
 	 * Validate alignment based on huge page size
 	 */
 	err = -EINVAL;
-	if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1))
+	if (dst->start & (vma_hpagesize - 1) || dst->len & (vma_hpagesize - 1))
 		goto out_unlock;
 
 retry:
@@ -358,7 +356,7 @@  static __always_inline ssize_t mfill_atomic_hugetlb(
 	 */
 	if (!dst_vma) {
 		err = -ENOENT;
-		dst_vma = find_dst_vma(dst_mm, dst_start, len);
+		dst_vma = find_dst_vma(dst_mm, dst);
 		if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
 			goto out_unlock;
 
@@ -378,8 +376,8 @@  static __always_inline ssize_t mfill_atomic_hugetlb(
 			goto out_unlock;
 	}
 
-	while (src_addr < src_start + len) {
-		BUG_ON(dst_addr >= dst_start + len);
+	while (src_addr < src_start + dst->len) {
+		BUG_ON(dst_addr >= dst->start + dst->len);
 
 		/*
 		 * Serialize via vma_lock and hugetlb_fault_mutex.
@@ -461,10 +459,9 @@  static __always_inline ssize_t mfill_atomic_hugetlb(
 #else /* !CONFIG_HUGETLB_PAGE */
 /* fail at build time if gcc attempts to use this */
 extern ssize_t mfill_atomic_hugetlb(struct vm_area_struct *dst_vma,
-				    unsigned long dst_start,
 				    unsigned long src_start,
-				    unsigned long len,
-				    uffd_flags_t flags);
+				    struct uffdio_range dst,
+				    uffd_flags_t mode_flags);
 #endif /* CONFIG_HUGETLB_PAGE */
 
 static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd,
@@ -510,9 +507,8 @@  static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd,
 }
 
 static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm,
-					    unsigned long dst_start,
 					    unsigned long src_start,
-					    unsigned long len,
+					    const struct uffdio_range *dst,
 					    atomic_t *mmap_changing,
 					    uffd_flags_t flags)
 {
@@ -526,15 +522,15 @@  static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm,
 	/*
 	 * Sanitize the command parameters:
 	 */
-	BUG_ON(dst_start & ~PAGE_MASK);
-	BUG_ON(len & ~PAGE_MASK);
+	BUG_ON(dst->start & ~PAGE_MASK);
+	BUG_ON(dst->len & ~PAGE_MASK);
 
 	/* Does the address range wrap, or is the span zero-sized? */
-	BUG_ON(src_start + len <= src_start);
-	BUG_ON(dst_start + len <= dst_start);
+	BUG_ON(src_start + dst->len <= src_start);
+	BUG_ON(dst->start + dst->len <= dst->start);
 
 	src_addr = src_start;
-	dst_addr = dst_start;
+	dst_addr = dst->start;
 	copied = 0;
 	page = NULL;
 retry:
@@ -554,7 +550,7 @@  static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm,
 	 * both valid and fully within a single existing vma.
 	 */
 	err = -ENOENT;
-	dst_vma = find_dst_vma(dst_mm, dst_start, len);
+	dst_vma = find_dst_vma(dst_mm, dst);
 	if (!dst_vma)
 		goto out_unlock;
 
@@ -578,8 +574,7 @@  static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm,
 	 * If this is a HUGETLB vma, pass off to appropriate routine
 	 */
 	if (is_vm_hugetlb_page(dst_vma))
-		return  mfill_atomic_hugetlb(dst_vma, dst_start,
-					     src_start, len, flags);
+		return  mfill_atomic_hugetlb(dst_vma, src_start, dst, flags);
 
 	if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
 		goto out_unlock;
@@ -597,10 +592,10 @@  static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm,
 	    unlikely(anon_vma_prepare(dst_vma)))
 		goto out_unlock;
 
-	while (src_addr < src_start + len) {
+	while (src_addr < src_start + dst->len) {
 		pmd_t dst_pmdval;
 
-		BUG_ON(dst_addr >= dst_start + len);
+		BUG_ON(dst_addr >= dst->start + dst->len);
 
 		dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
 		if (unlikely(!dst_pmd)) {
@@ -678,30 +673,32 @@  static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm,
 	return copied ? copied : err;
 }
 
-ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long dst_start,
-			  unsigned long src_start, unsigned long len,
+ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long src_start,
+			  const struct uffdio_range *dst,
 			  atomic_t *mmap_changing, uffd_flags_t flags)
 {
-	return mfill_atomic(dst_mm, dst_start, src_start, len,
+	return mfill_atomic(dst_mm, src_start, dst,
 			    mmap_changing, flags | MFILL_ATOMIC_COPY);
 }
 
-ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm, unsigned long start,
-			      unsigned long len, atomic_t *mmap_changing)
+ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm,
+			      const struct uffdio_range *dst,
+			      atomic_t *mmap_changing)
 {
-	return mfill_atomic(dst_mm, start, 0, len,
+	return mfill_atomic(dst_mm, 0, dst,
 			    mmap_changing, MFILL_ATOMIC_ZEROPAGE);
 }
 
-ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long start,
-			      unsigned long len, atomic_t *mmap_changing)
+ssize_t mfill_atomic_continue(struct mm_struct *dst_mm,
+			      const struct uffdio_range *dst,
+			      atomic_t *mmap_changing)
 {
-	return mfill_atomic(dst_mm, start, 0, len,
+	return mfill_atomic(dst_mm, 0, dst,
 			    mmap_changing, MFILL_ATOMIC_CONTINUE);
 }
 
 long uffd_wp_range(struct vm_area_struct *dst_vma,
-		   unsigned long start, unsigned long len, bool enable_wp)
+		   const struct uffdio_range *range, bool enable_wp)
 {
 	unsigned int mm_cp_flags;
 	struct mmu_gather tlb;
@@ -721,15 +718,16 @@  long uffd_wp_range(struct vm_area_struct *dst_vma,
 	if (!enable_wp && vma_wants_manual_pte_write_upgrade(dst_vma))
 		mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE;
 	tlb_gather_mmu(&tlb, dst_vma->vm_mm);
-	ret = change_protection(&tlb, dst_vma, start, start + len, mm_cp_flags);
+	ret = change_protection(&tlb, dst_vma, range->start,
+				range->start + range->len, mm_cp_flags);
 	tlb_finish_mmu(&tlb);
 
 	return ret;
 }
 
-int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
-			unsigned long len, bool enable_wp,
-			atomic_t *mmap_changing)
+int mwriteprotect_range(struct mm_struct *dst_mm,
+			const struct uffdio_range *dst,
+			bool enable_wp, atomic_t *mmap_changing)
 {
 	struct vm_area_struct *dst_vma;
 	unsigned long page_mask;
@@ -738,11 +736,11 @@  int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
 	/*
 	 * Sanitize the command parameters:
 	 */
-	BUG_ON(start & ~PAGE_MASK);
-	BUG_ON(len & ~PAGE_MASK);
+	BUG_ON(dst->start & ~PAGE_MASK);
+	BUG_ON(dst->len & ~PAGE_MASK);
 
 	/* Does the address range wrap, or is the span zero-sized? */
-	BUG_ON(start + len <= start);
+	BUG_ON(dst->start + dst->len <= dst->start);
 
 	mmap_read_lock(dst_mm);
 
@@ -756,7 +754,7 @@  int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
 		goto out_unlock;
 
 	err = -ENOENT;
-	dst_vma = find_dst_vma(dst_mm, start, len);
+	dst_vma = find_dst_vma(dst_mm, dst);
 
 	if (!dst_vma)
 		goto out_unlock;
@@ -768,11 +766,11 @@  int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
 	if (is_vm_hugetlb_page(dst_vma)) {
 		err = -EINVAL;
 		page_mask = vma_kernel_pagesize(dst_vma) - 1;
-		if ((start & page_mask) || (len & page_mask))
+		if ((dst->start & page_mask) || (dst->len & page_mask))
 			goto out_unlock;
 	}
 
-	err = uffd_wp_range(dst_vma, start, len, enable_wp);
+	err = uffd_wp_range(dst_vma, dst, enable_wp);
 
 	/* Return 0 on success, <0 on failures */
 	if (err > 0)