[hmm,15/15] mm/hmm: remove hmm_mirror and related
diff mbox series

Message ID 20191015181242.8343-16-jgg@ziepe.ca
State Superseded
Headers show
Series
  • Consolidate the mmu notifier interval_tree and locking
Related show

Commit Message

Jason Gunthorpe Oct. 15, 2019, 6:12 p.m. UTC
From: Jason Gunthorpe <jgg@mellanox.com>

The only two users of this are now converted to use mmu_range_notifier,
delete all the code and update hmm.rst.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 Documentation/vm/hmm.rst | 105 ++++-----------
 include/linux/hmm.h      | 183 +------------------------
 mm/Kconfig               |   1 -
 mm/hmm.c                 | 284 +--------------------------------------
 4 files changed, 33 insertions(+), 540 deletions(-)

Comments

Jerome Glisse Oct. 21, 2019, 6:38 p.m. UTC | #1
On Tue, Oct 15, 2019 at 03:12:42PM -0300, Jason Gunthorpe wrote:
> From: Jason Gunthorpe <jgg@mellanox.com>
> 
> The only two users of this are now converted to use mmu_range_notifier,
> delete all the code and update hmm.rst.

I guess i should point out that the reasons for hmm_mirror and hmm
was for:
    1) Maybe define a common API for userspace to provide memory
       placement hints (NUMA for GPU)
    2) multi-devices sharing same mirror page table

But support for multi-GPU in nouveau is way behind and i guess such
optimization will have to re-materialize what is necessary once that
happens.

Note this patch should also update kernel/fork.c and the mm_struct
definition AFAICT. With those changes you can add my:

Reviewed-by: Jérôme Glisse <jglisse@redhat.com>

> 
> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
> ---
>  Documentation/vm/hmm.rst | 105 ++++-----------
>  include/linux/hmm.h      | 183 +------------------------
>  mm/Kconfig               |   1 -
>  mm/hmm.c                 | 284 +--------------------------------------
>  4 files changed, 33 insertions(+), 540 deletions(-)
> 
> diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
> index 0a5960beccf76d..a247643035c4e2 100644
> --- a/Documentation/vm/hmm.rst
> +++ b/Documentation/vm/hmm.rst
> @@ -147,49 +147,16 @@ Address space mirroring implementation and API
>  Address space mirroring's main objective is to allow duplication of a range of
>  CPU page table into a device page table; HMM helps keep both synchronized. A
>  device driver that wants to mirror a process address space must start with the
> -registration of an hmm_mirror struct::
> -
> - int hmm_mirror_register(struct hmm_mirror *mirror,
> -                         struct mm_struct *mm);
> -
> -The mirror struct has a set of callbacks that are used
> -to propagate CPU page tables::
> -
> - struct hmm_mirror_ops {
> -     /* release() - release hmm_mirror
> -      *
> -      * @mirror: pointer to struct hmm_mirror
> -      *
> -      * This is called when the mm_struct is being released.  The callback
> -      * must ensure that all access to any pages obtained from this mirror
> -      * is halted before the callback returns. All future access should
> -      * fault.
> -      */
> -     void (*release)(struct hmm_mirror *mirror);
> -
> -     /* sync_cpu_device_pagetables() - synchronize page tables
> -      *
> -      * @mirror: pointer to struct hmm_mirror
> -      * @update: update information (see struct mmu_notifier_range)
> -      * Return: -EAGAIN if update.blockable false and callback need to
> -      *         block, 0 otherwise.
> -      *
> -      * This callback ultimately originates from mmu_notifiers when the CPU
> -      * page table is updated. The device driver must update its page table
> -      * in response to this callback. The update argument tells what action
> -      * to perform.
> -      *
> -      * The device driver must not return from this callback until the device
> -      * page tables are completely updated (TLBs flushed, etc); this is a
> -      * synchronous call.
> -      */
> -     int (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
> -                                       const struct hmm_update *update);
> - };
> -
> -The device driver must perform the update action to the range (mark range
> -read only, or fully unmap, etc.). The device must complete the update before
> -the driver callback returns.
> +registration of a mmu_range_notifier::
> +
> + mrn->ops = &driver_ops;
> + int mmu_range_notifier_insert(struct mmu_range_notifier *mrn,
> +			      unsigned long start, unsigned long length,
> +			      struct mm_struct *mm);
> +
> +During the driver_ops->invalidate() callback the device driver must perform
> +the update action to the range (mark range read only, or fully unmap,
> +etc.). The device must complete the update before the driver callback returns.
>  
>  When the device driver wants to populate a range of virtual addresses, it can
>  use::
> @@ -216,70 +183,46 @@ The usage pattern is::
>        struct hmm_range range;
>        ...
>  
> +      range.notifier = &mrn;
>        range.start = ...;
>        range.end = ...;
>        range.pfns = ...;
>        range.flags = ...;
>        range.values = ...;
>        range.pfn_shift = ...;
> -      hmm_range_register(&range, mirror);
>  
> -      /*
> -       * Just wait for range to be valid, safe to ignore return value as we
> -       * will use the return value of hmm_range_fault() below under the
> -       * mmap_sem to ascertain the validity of the range.
> -       */
> -      hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
> +      if (!mmget_not_zero(mrn->notifier.mm))
> +          return -EFAULT;
>  
>   again:
> +      range.notifier_seq = mmu_range_read_begin(&mrn);
>        down_read(&mm->mmap_sem);
>        ret = hmm_range_fault(&range, HMM_RANGE_SNAPSHOT);
>        if (ret) {
>            up_read(&mm->mmap_sem);
> -          if (ret == -EBUSY) {
> -            /*
> -             * No need to check hmm_range_wait_until_valid() return value
> -             * on retry we will get proper error with hmm_range_fault()
> -             */
> -            hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
> -            goto again;
> -          }
> -          hmm_range_unregister(&range);
> +          if (ret == -EBUSY)
> +                 goto again;
>            return ret;
>        }
> +      up_read(&mm->mmap_sem);
> +
>        take_lock(driver->update);
> -      if (!hmm_range_valid(&range)) {
> +      if (mmu_range_read_retry(&mrn, range.notifier_seq) {
>            release_lock(driver->update);
> -          up_read(&mm->mmap_sem);
>            goto again;
>        }
>  
> -      // Use pfns array content to update device page table
> +      /* Use pfns array content to update device page table,
> +       * under the update lock */
>  
> -      hmm_range_unregister(&range);
>        release_lock(driver->update);
> -      up_read(&mm->mmap_sem);
>        return 0;
>   }
>  
>  The driver->update lock is the same lock that the driver takes inside its
> -sync_cpu_device_pagetables() callback. That lock must be held before calling
> -hmm_range_valid() to avoid any race with a concurrent CPU page table update.
> -
> -HMM implements all this on top of the mmu_notifier API because we wanted a
> -simpler API and also to be able to perform optimizations latter on like doing
> -concurrent device updates in multi-devices scenario.
> -
> -HMM also serves as an impedance mismatch between how CPU page table updates
> -are done (by CPU write to the page table and TLB flushes) and how devices
> -update their own page table. Device updates are a multi-step process. First,
> -appropriate commands are written to a buffer, then this buffer is scheduled for
> -execution on the device. It is only once the device has executed commands in
> -the buffer that the update is done. Creating and scheduling the update command
> -buffer can happen concurrently for multiple devices. Waiting for each device to
> -report commands as executed is serialized (there is no point in doing this
> -concurrently).
> -
> +invalidate() callback. That lock must be held before calling
> +mmu_range_read_retry() to avoid any race with a concurrent CPU page table
> +update.
>  
>  Leverage default_flags and pfn_flags_mask
>  =========================================
> diff --git a/include/linux/hmm.h b/include/linux/hmm.h
> index 2666eb08a40615..b4af5173523232 100644
> --- a/include/linux/hmm.h
> +++ b/include/linux/hmm.h
> @@ -68,29 +68,6 @@
>  #include <linux/completion.h>
>  #include <linux/mmu_notifier.h>
>  
> -
> -/*
> - * struct hmm - HMM per mm struct
> - *
> - * @mm: mm struct this HMM struct is bound to
> - * @lock: lock protecting ranges list
> - * @ranges: list of range being snapshotted
> - * @mirrors: list of mirrors for this mm
> - * @mmu_notifier: mmu notifier to track updates to CPU page table
> - * @mirrors_sem: read/write semaphore protecting the mirrors list
> - * @wq: wait queue for user waiting on a range invalidation
> - * @notifiers: count of active mmu notifiers
> - */
> -struct hmm {
> -	struct mmu_notifier	mmu_notifier;
> -	spinlock_t		ranges_lock;
> -	struct list_head	ranges;
> -	struct list_head	mirrors;
> -	struct rw_semaphore	mirrors_sem;
> -	wait_queue_head_t	wq;
> -	long			notifiers;
> -};
> -
>  /*
>   * hmm_pfn_flag_e - HMM flag enums
>   *
> @@ -143,9 +120,8 @@ enum hmm_pfn_value_e {
>  /*
>   * struct hmm_range - track invalidation lock on virtual address range
>   *
> - * @notifier: an optional mmu_range_notifier
> - * @notifier_seq: when notifier is used this is the result of
> - *                mmu_range_read_begin()
> + * @notifier: a mmu_range_notifier that includes the start/end
> + * @notifier_seq: result of mmu_range_read_begin()
>   * @hmm: the core HMM structure this range is active against
>   * @vma: the vm area struct for the range
>   * @list: all range lock are on a list
> @@ -162,8 +138,6 @@ enum hmm_pfn_value_e {
>  struct hmm_range {
>  	struct mmu_range_notifier *notifier;
>  	unsigned long		notifier_seq;
> -	struct hmm		*hmm;
> -	struct list_head	list;
>  	unsigned long		start;
>  	unsigned long		end;
>  	uint64_t		*pfns;
> @@ -172,32 +146,8 @@ struct hmm_range {
>  	uint64_t		default_flags;
>  	uint64_t		pfn_flags_mask;
>  	uint8_t			pfn_shift;
> -	bool			valid;
>  };
>  
> -/*
> - * hmm_range_wait_until_valid() - wait for range to be valid
> - * @range: range affected by invalidation to wait on
> - * @timeout: time out for wait in ms (ie abort wait after that period of time)
> - * Return: true if the range is valid, false otherwise.
> - */
> -static inline bool hmm_range_wait_until_valid(struct hmm_range *range,
> -					      unsigned long timeout)
> -{
> -	return wait_event_timeout(range->hmm->wq, range->valid,
> -				  msecs_to_jiffies(timeout)) != 0;
> -}
> -
> -/*
> - * hmm_range_valid() - test if a range is valid or not
> - * @range: range
> - * Return: true if the range is valid, false otherwise.
> - */
> -static inline bool hmm_range_valid(struct hmm_range *range)
> -{
> -	return range->valid;
> -}
> -
>  /*
>   * hmm_device_entry_to_page() - return struct page pointed to by a device entry
>   * @range: range use to decode device entry value
> @@ -267,111 +217,6 @@ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range,
>  		range->flags[HMM_PFN_VALID];
>  }
>  
> -/*
> - * Mirroring: how to synchronize device page table with CPU page table.
> - *
> - * A device driver that is participating in HMM mirroring must always
> - * synchronize with CPU page table updates. For this, device drivers can either
> - * directly use mmu_notifier APIs or they can use the hmm_mirror API. Device
> - * drivers can decide to register one mirror per device per process, or just
> - * one mirror per process for a group of devices. The pattern is:
> - *
> - *      int device_bind_address_space(..., struct mm_struct *mm, ...)
> - *      {
> - *          struct device_address_space *das;
> - *
> - *          // Device driver specific initialization, and allocation of das
> - *          // which contains an hmm_mirror struct as one of its fields.
> - *          ...
> - *
> - *          ret = hmm_mirror_register(&das->mirror, mm, &device_mirror_ops);
> - *          if (ret) {
> - *              // Cleanup on error
> - *              return ret;
> - *          }
> - *
> - *          // Other device driver specific initialization
> - *          ...
> - *      }
> - *
> - * Once an hmm_mirror is registered for an address space, the device driver
> - * will get callbacks through sync_cpu_device_pagetables() operation (see
> - * hmm_mirror_ops struct).
> - *
> - * Device driver must not free the struct containing the hmm_mirror struct
> - * before calling hmm_mirror_unregister(). The expected usage is to do that when
> - * the device driver is unbinding from an address space.
> - *
> - *
> - *      void device_unbind_address_space(struct device_address_space *das)
> - *      {
> - *          // Device driver specific cleanup
> - *          ...
> - *
> - *          hmm_mirror_unregister(&das->mirror);
> - *
> - *          // Other device driver specific cleanup, and now das can be freed
> - *          ...
> - *      }
> - */
> -
> -struct hmm_mirror;
> -
> -/*
> - * struct hmm_mirror_ops - HMM mirror device operations callback
> - *
> - * @update: callback to update range on a device
> - */
> -struct hmm_mirror_ops {
> -	/* release() - release hmm_mirror
> -	 *
> -	 * @mirror: pointer to struct hmm_mirror
> -	 *
> -	 * This is called when the mm_struct is being released.  The callback
> -	 * must ensure that all access to any pages obtained from this mirror
> -	 * is halted before the callback returns. All future access should
> -	 * fault.
> -	 */
> -	void (*release)(struct hmm_mirror *mirror);
> -
> -	/* sync_cpu_device_pagetables() - synchronize page tables
> -	 *
> -	 * @mirror: pointer to struct hmm_mirror
> -	 * @update: update information (see struct mmu_notifier_range)
> -	 * Return: -EAGAIN if mmu_notifier_range_blockable(update) is false
> -	 * and callback needs to block, 0 otherwise.
> -	 *
> -	 * This callback ultimately originates from mmu_notifiers when the CPU
> -	 * page table is updated. The device driver must update its page table
> -	 * in response to this callback. The update argument tells what action
> -	 * to perform.
> -	 *
> -	 * The device driver must not return from this callback until the device
> -	 * page tables are completely updated (TLBs flushed, etc); this is a
> -	 * synchronous call.
> -	 */
> -	int (*sync_cpu_device_pagetables)(
> -		struct hmm_mirror *mirror,
> -		const struct mmu_notifier_range *update);
> -};
> -
> -/*
> - * struct hmm_mirror - mirror struct for a device driver
> - *
> - * @hmm: pointer to struct hmm (which is unique per mm_struct)
> - * @ops: device driver callback for HMM mirror operations
> - * @list: for list of mirrors of a given mm
> - *
> - * Each address space (mm_struct) being mirrored by a device must register one
> - * instance of an hmm_mirror struct with HMM. HMM will track the list of all
> - * mirrors for each mm_struct.
> - */
> -struct hmm_mirror {
> -	struct hmm			*hmm;
> -	const struct hmm_mirror_ops	*ops;
> -	struct list_head		list;
> -};
> -
>  /*
>   * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case.
>   */
> @@ -381,15 +226,9 @@ struct hmm_mirror {
>  #define HMM_FAULT_SNAPSHOT		(1 << 1)
>  
>  #ifdef CONFIG_HMM_MIRROR
> -int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm);
> -void hmm_mirror_unregister(struct hmm_mirror *mirror);
> -
>  /*
>   * Please see Documentation/vm/hmm.rst for how to use the range API.
>   */
> -int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror);
> -void hmm_range_unregister(struct hmm_range *range);
> -
>  long hmm_range_fault(struct hmm_range *range, unsigned int flags);
>  
>  long hmm_range_dma_map(struct hmm_range *range,
> @@ -401,24 +240,6 @@ long hmm_range_dma_unmap(struct hmm_range *range,
>  			 dma_addr_t *daddrs,
>  			 bool dirty);
>  #else
> -int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
> -{
> -	return -EOPNOTSUPP;
> -}
> -
> -void hmm_mirror_unregister(struct hmm_mirror *mirror)
> -{
> -}
> -
> -int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror)
> -{
> -	return -EOPNOTSUPP;
> -}
> -
> -void hmm_range_unregister(struct hmm_range *range)
> -{
> -}
> -
>  static inline long hmm_range_fault(struct hmm_range *range, unsigned int flags)
>  {
>  	return -EOPNOTSUPP;
> diff --git a/mm/Kconfig b/mm/Kconfig
> index d0b5046d9aeffd..e38ff1d5968dbf 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -675,7 +675,6 @@ config DEV_PAGEMAP_OPS
>  config HMM_MIRROR
>  	bool
>  	depends on MMU
> -	depends on MMU_NOTIFIER
>  
>  config DEVICE_PRIVATE
>  	bool "Unaddressable device memory (GPU memory, ...)"
> diff --git a/mm/hmm.c b/mm/hmm.c
> index 22ac3595771feb..75d15a820e182e 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -26,193 +26,6 @@
>  #include <linux/mmu_notifier.h>
>  #include <linux/memory_hotplug.h>
>  
> -static struct mmu_notifier *hmm_alloc_notifier(struct mm_struct *mm)
> -{
> -	struct hmm *hmm;
> -
> -	hmm = kzalloc(sizeof(*hmm), GFP_KERNEL);
> -	if (!hmm)
> -		return ERR_PTR(-ENOMEM);
> -
> -	init_waitqueue_head(&hmm->wq);
> -	INIT_LIST_HEAD(&hmm->mirrors);
> -	init_rwsem(&hmm->mirrors_sem);
> -	INIT_LIST_HEAD(&hmm->ranges);
> -	spin_lock_init(&hmm->ranges_lock);
> -	hmm->notifiers = 0;
> -	return &hmm->mmu_notifier;
> -}
> -
> -static void hmm_free_notifier(struct mmu_notifier *mn)
> -{
> -	struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
> -
> -	WARN_ON(!list_empty(&hmm->ranges));
> -	WARN_ON(!list_empty(&hmm->mirrors));
> -	kfree(hmm);
> -}
> -
> -static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
> -{
> -	struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
> -	struct hmm_mirror *mirror;
> -
> -	/*
> -	 * Since hmm_range_register() holds the mmget() lock hmm_release() is
> -	 * prevented as long as a range exists.
> -	 */
> -	WARN_ON(!list_empty_careful(&hmm->ranges));
> -
> -	down_read(&hmm->mirrors_sem);
> -	list_for_each_entry(mirror, &hmm->mirrors, list) {
> -		/*
> -		 * Note: The driver is not allowed to trigger
> -		 * hmm_mirror_unregister() from this thread.
> -		 */
> -		if (mirror->ops->release)
> -			mirror->ops->release(mirror);
> -	}
> -	up_read(&hmm->mirrors_sem);
> -}
> -
> -static void notifiers_decrement(struct hmm *hmm)
> -{
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&hmm->ranges_lock, flags);
> -	hmm->notifiers--;
> -	if (!hmm->notifiers) {
> -		struct hmm_range *range;
> -
> -		list_for_each_entry(range, &hmm->ranges, list) {
> -			if (range->valid)
> -				continue;
> -			range->valid = true;
> -		}
> -		wake_up_all(&hmm->wq);
> -	}
> -	spin_unlock_irqrestore(&hmm->ranges_lock, flags);
> -}
> -
> -static int hmm_invalidate_range_start(struct mmu_notifier *mn,
> -			const struct mmu_notifier_range *nrange)
> -{
> -	struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
> -	struct hmm_mirror *mirror;
> -	struct hmm_range *range;
> -	unsigned long flags;
> -	int ret = 0;
> -
> -	spin_lock_irqsave(&hmm->ranges_lock, flags);
> -	hmm->notifiers++;
> -	list_for_each_entry(range, &hmm->ranges, list) {
> -		if (nrange->end < range->start || nrange->start >= range->end)
> -			continue;
> -
> -		range->valid = false;
> -	}
> -	spin_unlock_irqrestore(&hmm->ranges_lock, flags);
> -
> -	if (mmu_notifier_range_blockable(nrange))
> -		down_read(&hmm->mirrors_sem);
> -	else if (!down_read_trylock(&hmm->mirrors_sem)) {
> -		ret = -EAGAIN;
> -		goto out;
> -	}
> -
> -	list_for_each_entry(mirror, &hmm->mirrors, list) {
> -		int rc;
> -
> -		rc = mirror->ops->sync_cpu_device_pagetables(mirror, nrange);
> -		if (rc) {
> -			if (WARN_ON(mmu_notifier_range_blockable(nrange) ||
> -			    rc != -EAGAIN))
> -				continue;
> -			ret = -EAGAIN;
> -			break;
> -		}
> -	}
> -	up_read(&hmm->mirrors_sem);
> -
> -out:
> -	if (ret)
> -		notifiers_decrement(hmm);
> -	return ret;
> -}
> -
> -static void hmm_invalidate_range_end(struct mmu_notifier *mn,
> -			const struct mmu_notifier_range *nrange)
> -{
> -	struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
> -
> -	notifiers_decrement(hmm);
> -}
> -
> -static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
> -	.release		= hmm_release,
> -	.invalidate_range_start	= hmm_invalidate_range_start,
> -	.invalidate_range_end	= hmm_invalidate_range_end,
> -	.alloc_notifier		= hmm_alloc_notifier,
> -	.free_notifier		= hmm_free_notifier,
> -};
> -
> -/*
> - * hmm_mirror_register() - register a mirror against an mm
> - *
> - * @mirror: new mirror struct to register
> - * @mm: mm to register against
> - * Return: 0 on success, -ENOMEM if no memory, -EINVAL if invalid arguments
> - *
> - * To start mirroring a process address space, the device driver must register
> - * an HMM mirror struct.
> - *
> - * The caller cannot unregister the hmm_mirror while any ranges are
> - * registered.
> - *
> - * Callers using this function must put a call to mmu_notifier_synchronize()
> - * in their module exit functions.
> - */
> -int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
> -{
> -	struct mmu_notifier *mn;
> -
> -	lockdep_assert_held_write(&mm->mmap_sem);
> -
> -	/* Sanity check */
> -	if (!mm || !mirror || !mirror->ops)
> -		return -EINVAL;
> -
> -	mn = mmu_notifier_get_locked(&hmm_mmu_notifier_ops, mm);
> -	if (IS_ERR(mn))
> -		return PTR_ERR(mn);
> -	mirror->hmm = container_of(mn, struct hmm, mmu_notifier);
> -
> -	down_write(&mirror->hmm->mirrors_sem);
> -	list_add(&mirror->list, &mirror->hmm->mirrors);
> -	up_write(&mirror->hmm->mirrors_sem);
> -
> -	return 0;
> -}
> -EXPORT_SYMBOL(hmm_mirror_register);
> -
> -/*
> - * hmm_mirror_unregister() - unregister a mirror
> - *
> - * @mirror: mirror struct to unregister
> - *
> - * Stop mirroring a process address space, and cleanup.
> - */
> -void hmm_mirror_unregister(struct hmm_mirror *mirror)
> -{
> -	struct hmm *hmm = mirror->hmm;
> -
> -	down_write(&hmm->mirrors_sem);
> -	list_del(&mirror->list);
> -	up_write(&hmm->mirrors_sem);
> -	mmu_notifier_put(&hmm->mmu_notifier);
> -}
> -EXPORT_SYMBOL(hmm_mirror_unregister);
> -
>  struct hmm_vma_walk {
>  	struct hmm_range	*range;
>  	struct dev_pagemap	*pgmap;
> @@ -779,87 +592,6 @@ static void hmm_pfns_clear(struct hmm_range *range,
>  		*pfns = range->values[HMM_PFN_NONE];
>  }
>  
> -/*
> - * hmm_range_register() - start tracking change to CPU page table over a range
> - * @range: range
> - * @mm: the mm struct for the range of virtual address
> - *
> - * Return: 0 on success, -EFAULT if the address space is no longer valid
> - *
> - * Track updates to the CPU page table see include/linux/hmm.h
> - */
> -int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror)
> -{
> -	struct hmm *hmm = mirror->hmm;
> -	unsigned long flags;
> -
> -	range->valid = false;
> -	range->hmm = NULL;
> -
> -	if ((range->start & (PAGE_SIZE - 1)) || (range->end & (PAGE_SIZE - 1)))
> -		return -EINVAL;
> -	if (range->start >= range->end)
> -		return -EINVAL;
> -
> -	/* Prevent hmm_release() from running while the range is valid */
> -	if (!mmget_not_zero(hmm->mmu_notifier.mm))
> -		return -EFAULT;
> -
> -	/* Initialize range to track CPU page table updates. */
> -	spin_lock_irqsave(&hmm->ranges_lock, flags);
> -
> -	range->hmm = hmm;
> -	list_add(&range->list, &hmm->ranges);
> -
> -	/*
> -	 * If there are any concurrent notifiers we have to wait for them for
> -	 * the range to be valid (see hmm_range_wait_until_valid()).
> -	 */
> -	if (!hmm->notifiers)
> -		range->valid = true;
> -	spin_unlock_irqrestore(&hmm->ranges_lock, flags);
> -
> -	return 0;
> -}
> -EXPORT_SYMBOL(hmm_range_register);
> -
> -/*
> - * hmm_range_unregister() - stop tracking change to CPU page table over a range
> - * @range: range
> - *
> - * Range struct is used to track updates to the CPU page table after a call to
> - * hmm_range_register(). See include/linux/hmm.h for how to use it.
> - */
> -void hmm_range_unregister(struct hmm_range *range)
> -{
> -	struct hmm *hmm = range->hmm;
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&hmm->ranges_lock, flags);
> -	list_del_init(&range->list);
> -	spin_unlock_irqrestore(&hmm->ranges_lock, flags);
> -
> -	/* Drop reference taken by hmm_range_register() */
> -	mmput(hmm->mmu_notifier.mm);
> -
> -	/*
> -	 * The range is now invalid and the ref on the hmm is dropped, so
> -	 * poison the pointer.  Leave other fields in place, for the caller's
> -	 * use.
> -	 */
> -	range->valid = false;
> -	memset(&range->hmm, POISON_INUSE, sizeof(range->hmm));
> -}
> -EXPORT_SYMBOL(hmm_range_unregister);
> -
> -static bool needs_retry(struct hmm_range *range)
> -{
> -	if (range->notifier)
> -		return mmu_range_check_retry(range->notifier,
> -					     range->notifier_seq);
> -	return !range->valid;
> -}
> -
>  static const struct mm_walk_ops hmm_walk_ops = {
>  	.pud_entry	= hmm_vma_walk_pud,
>  	.pmd_entry	= hmm_vma_walk_pmd,
> @@ -900,20 +632,15 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
>  	const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
>  	unsigned long start = range->start, end;
>  	struct hmm_vma_walk hmm_vma_walk;
> -	struct mm_struct *mm;
> +	struct mm_struct *mm = range->notifier->mm;
>  	struct vm_area_struct *vma;
>  	int ret;
>  
> -	if (range->notifier)
> -		mm = range->notifier->mm;
> -	else
> -		mm = range->hmm->mmu_notifier.mm;
> -
>  	lockdep_assert_held(&mm->mmap_sem);
>  
>  	do {
>  		/* If range is no longer valid force retry. */
> -		if (needs_retry(range))
> +		if (mmu_range_check_retry(range->notifier, range->notifier_seq))
>  			return -EBUSY;
>  
>  		vma = find_vma(mm, start);
> @@ -946,7 +673,9 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
>  			start = hmm_vma_walk.last;
>  
>  			/* Keep trying while the range is valid. */
> -		} while (ret == -EBUSY && !needs_retry(range));
> +		} while (ret == -EBUSY &&
> +			 !mmu_range_check_retry(range->notifier,
> +						range->notifier_seq));
>  
>  		if (ret) {
>  			unsigned long i;
> @@ -1004,7 +733,8 @@ long hmm_range_dma_map(struct hmm_range *range, struct device *device,
>  			continue;
>  
>  		/* Check if range is being invalidated */
> -		if (needs_retry(range)) {
> +		if (mmu_range_check_retry(range->notifier,
> +					  range->notifier_seq)) {
>  			ret = -EBUSY;
>  			goto unmap;
>  		}
> -- 
> 2.23.0
>
Jason Gunthorpe Oct. 21, 2019, 6:57 p.m. UTC | #2
On Mon, Oct 21, 2019 at 02:38:24PM -0400, Jerome Glisse wrote:
> On Tue, Oct 15, 2019 at 03:12:42PM -0300, Jason Gunthorpe wrote:
> > From: Jason Gunthorpe <jgg@mellanox.com>
> > 
> > The only two users of this are now converted to use mmu_range_notifier,
> > delete all the code and update hmm.rst.
> 
> I guess i should point out that the reasons for hmm_mirror and hmm
> was for:
>     1) Maybe define a common API for userspace to provide memory
>        placement hints (NUMA for GPU)

Do you think this needs special code in the notifiers?

>     2) multi-devices sharing same mirror page table

Oh neat, but I think this just means the GPU driver has to register a
single notifier for multiple GPUs??

> But support for multi-GPU in nouveau is way behind and i guess such
> optimization will have to re-materialize what is necessary once that
> happens.

Sure, it will be easier to understand what is needed with a bit of
code!

> Note this patch should also update kernel/fork.c and the mm_struct
> definition AFAICT. With those changes you can add my:

Can you please elaborate what updates you mean? I'm not sure. 

Maybe I already got the things you are thinking of with the get/put
changes?

Thanks,
Jason
Jerome Glisse Oct. 21, 2019, 7:19 p.m. UTC | #3
On Mon, Oct 21, 2019 at 06:57:42PM +0000, Jason Gunthorpe wrote:
> On Mon, Oct 21, 2019 at 02:38:24PM -0400, Jerome Glisse wrote:
> > On Tue, Oct 15, 2019 at 03:12:42PM -0300, Jason Gunthorpe wrote:
> > > From: Jason Gunthorpe <jgg@mellanox.com>
> > > 
> > > The only two users of this are now converted to use mmu_range_notifier,
> > > delete all the code and update hmm.rst.
> > 
> > I guess i should point out that the reasons for hmm_mirror and hmm
> > was for:
> >     1) Maybe define a common API for userspace to provide memory
> >        placement hints (NUMA for GPU)
> 
> Do you think this needs special code in the notifiers?

Just need a place where to hang userspace policy hint the hmm_range
was the prime suspect. I need to revisit this once the nouveau user
space is in better shape.

> 
> >     2) multi-devices sharing same mirror page table
> 
> Oh neat, but I think this just means the GPU driver has to register a
> single notifier for multiple GPUs??

Yes that was the idea a single notifier with share page table, but
at this time this is non existent code so no need to hinder change
just for the sake of it.

> 
> > But support for multi-GPU in nouveau is way behind and i guess such
> > optimization will have to re-materialize what is necessary once that
> > happens.
> 
> Sure, it will be easier to understand what is needed with a bit of
> code!
> 
> > Note this patch should also update kernel/fork.c and the mm_struct
> > definition AFAICT. With those changes you can add my:
> 
> Can you please elaborate what updates you mean? I'm not sure. 
> 
> Maybe I already got the things you are thinking of with the get/put
> changes?

Oh i forgot this was already taken care of by this. So yes all is
fine:

Reviewed-by: Jérôme Glisse <jglisse@redhat.com>

Patch
diff mbox series

diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index 0a5960beccf76d..a247643035c4e2 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -147,49 +147,16 @@  Address space mirroring implementation and API
 Address space mirroring's main objective is to allow duplication of a range of
 CPU page table into a device page table; HMM helps keep both synchronized. A
 device driver that wants to mirror a process address space must start with the
-registration of an hmm_mirror struct::
-
- int hmm_mirror_register(struct hmm_mirror *mirror,
-                         struct mm_struct *mm);
-
-The mirror struct has a set of callbacks that are used
-to propagate CPU page tables::
-
- struct hmm_mirror_ops {
-     /* release() - release hmm_mirror
-      *
-      * @mirror: pointer to struct hmm_mirror
-      *
-      * This is called when the mm_struct is being released.  The callback
-      * must ensure that all access to any pages obtained from this mirror
-      * is halted before the callback returns. All future access should
-      * fault.
-      */
-     void (*release)(struct hmm_mirror *mirror);
-
-     /* sync_cpu_device_pagetables() - synchronize page tables
-      *
-      * @mirror: pointer to struct hmm_mirror
-      * @update: update information (see struct mmu_notifier_range)
-      * Return: -EAGAIN if update.blockable false and callback need to
-      *         block, 0 otherwise.
-      *
-      * This callback ultimately originates from mmu_notifiers when the CPU
-      * page table is updated. The device driver must update its page table
-      * in response to this callback. The update argument tells what action
-      * to perform.
-      *
-      * The device driver must not return from this callback until the device
-      * page tables are completely updated (TLBs flushed, etc); this is a
-      * synchronous call.
-      */
-     int (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
-                                       const struct hmm_update *update);
- };
-
-The device driver must perform the update action to the range (mark range
-read only, or fully unmap, etc.). The device must complete the update before
-the driver callback returns.
+registration of a mmu_range_notifier::
+
+ mrn->ops = &driver_ops;
+ int mmu_range_notifier_insert(struct mmu_range_notifier *mrn,
+			      unsigned long start, unsigned long length,
+			      struct mm_struct *mm);
+
+During the driver_ops->invalidate() callback the device driver must perform
+the update action to the range (mark range read only, or fully unmap,
+etc.). The device must complete the update before the driver callback returns.
 
 When the device driver wants to populate a range of virtual addresses, it can
 use::
@@ -216,70 +183,46 @@  The usage pattern is::
       struct hmm_range range;
       ...
 
+      range.notifier = &mrn;
       range.start = ...;
       range.end = ...;
       range.pfns = ...;
       range.flags = ...;
       range.values = ...;
       range.pfn_shift = ...;
-      hmm_range_register(&range, mirror);
 
-      /*
-       * Just wait for range to be valid, safe to ignore return value as we
-       * will use the return value of hmm_range_fault() below under the
-       * mmap_sem to ascertain the validity of the range.
-       */
-      hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
+      if (!mmget_not_zero(mrn->notifier.mm))
+          return -EFAULT;
 
  again:
+      range.notifier_seq = mmu_range_read_begin(&mrn);
       down_read(&mm->mmap_sem);
       ret = hmm_range_fault(&range, HMM_RANGE_SNAPSHOT);
       if (ret) {
           up_read(&mm->mmap_sem);
-          if (ret == -EBUSY) {
-            /*
-             * No need to check hmm_range_wait_until_valid() return value
-             * on retry we will get proper error with hmm_range_fault()
-             */
-            hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
-            goto again;
-          }
-          hmm_range_unregister(&range);
+          if (ret == -EBUSY)
+                 goto again;
           return ret;
       }
+      up_read(&mm->mmap_sem);
+
       take_lock(driver->update);
-      if (!hmm_range_valid(&range)) {
+      if (mmu_range_read_retry(&mrn, range.notifier_seq) {
           release_lock(driver->update);
-          up_read(&mm->mmap_sem);
           goto again;
       }
 
-      // Use pfns array content to update device page table
+      /* Use pfns array content to update device page table,
+       * under the update lock */
 
-      hmm_range_unregister(&range);
       release_lock(driver->update);
-      up_read(&mm->mmap_sem);
       return 0;
  }
 
 The driver->update lock is the same lock that the driver takes inside its
-sync_cpu_device_pagetables() callback. That lock must be held before calling
-hmm_range_valid() to avoid any race with a concurrent CPU page table update.
-
-HMM implements all this on top of the mmu_notifier API because we wanted a
-simpler API and also to be able to perform optimizations latter on like doing
-concurrent device updates in multi-devices scenario.
-
-HMM also serves as an impedance mismatch between how CPU page table updates
-are done (by CPU write to the page table and TLB flushes) and how devices
-update their own page table. Device updates are a multi-step process. First,
-appropriate commands are written to a buffer, then this buffer is scheduled for
-execution on the device. It is only once the device has executed commands in
-the buffer that the update is done. Creating and scheduling the update command
-buffer can happen concurrently for multiple devices. Waiting for each device to
-report commands as executed is serialized (there is no point in doing this
-concurrently).
-
+invalidate() callback. That lock must be held before calling
+mmu_range_read_retry() to avoid any race with a concurrent CPU page table
+update.
 
 Leverage default_flags and pfn_flags_mask
 =========================================
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 2666eb08a40615..b4af5173523232 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -68,29 +68,6 @@ 
 #include <linux/completion.h>
 #include <linux/mmu_notifier.h>
 
-
-/*
- * struct hmm - HMM per mm struct
- *
- * @mm: mm struct this HMM struct is bound to
- * @lock: lock protecting ranges list
- * @ranges: list of range being snapshotted
- * @mirrors: list of mirrors for this mm
- * @mmu_notifier: mmu notifier to track updates to CPU page table
- * @mirrors_sem: read/write semaphore protecting the mirrors list
- * @wq: wait queue for user waiting on a range invalidation
- * @notifiers: count of active mmu notifiers
- */
-struct hmm {
-	struct mmu_notifier	mmu_notifier;
-	spinlock_t		ranges_lock;
-	struct list_head	ranges;
-	struct list_head	mirrors;
-	struct rw_semaphore	mirrors_sem;
-	wait_queue_head_t	wq;
-	long			notifiers;
-};
-
 /*
  * hmm_pfn_flag_e - HMM flag enums
  *
@@ -143,9 +120,8 @@  enum hmm_pfn_value_e {
 /*
  * struct hmm_range - track invalidation lock on virtual address range
  *
- * @notifier: an optional mmu_range_notifier
- * @notifier_seq: when notifier is used this is the result of
- *                mmu_range_read_begin()
+ * @notifier: a mmu_range_notifier that includes the start/end
+ * @notifier_seq: result of mmu_range_read_begin()
  * @hmm: the core HMM structure this range is active against
  * @vma: the vm area struct for the range
  * @list: all range lock are on a list
@@ -162,8 +138,6 @@  enum hmm_pfn_value_e {
 struct hmm_range {
 	struct mmu_range_notifier *notifier;
 	unsigned long		notifier_seq;
-	struct hmm		*hmm;
-	struct list_head	list;
 	unsigned long		start;
 	unsigned long		end;
 	uint64_t		*pfns;
@@ -172,32 +146,8 @@  struct hmm_range {
 	uint64_t		default_flags;
 	uint64_t		pfn_flags_mask;
 	uint8_t			pfn_shift;
-	bool			valid;
 };
 
-/*
- * hmm_range_wait_until_valid() - wait for range to be valid
- * @range: range affected by invalidation to wait on
- * @timeout: time out for wait in ms (ie abort wait after that period of time)
- * Return: true if the range is valid, false otherwise.
- */
-static inline bool hmm_range_wait_until_valid(struct hmm_range *range,
-					      unsigned long timeout)
-{
-	return wait_event_timeout(range->hmm->wq, range->valid,
-				  msecs_to_jiffies(timeout)) != 0;
-}
-
-/*
- * hmm_range_valid() - test if a range is valid or not
- * @range: range
- * Return: true if the range is valid, false otherwise.
- */
-static inline bool hmm_range_valid(struct hmm_range *range)
-{
-	return range->valid;
-}
-
 /*
  * hmm_device_entry_to_page() - return struct page pointed to by a device entry
  * @range: range use to decode device entry value
@@ -267,111 +217,6 @@  static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range,
 		range->flags[HMM_PFN_VALID];
 }
 
-/*
- * Mirroring: how to synchronize device page table with CPU page table.
- *
- * A device driver that is participating in HMM mirroring must always
- * synchronize with CPU page table updates. For this, device drivers can either
- * directly use mmu_notifier APIs or they can use the hmm_mirror API. Device
- * drivers can decide to register one mirror per device per process, or just
- * one mirror per process for a group of devices. The pattern is:
- *
- *      int device_bind_address_space(..., struct mm_struct *mm, ...)
- *      {
- *          struct device_address_space *das;
- *
- *          // Device driver specific initialization, and allocation of das
- *          // which contains an hmm_mirror struct as one of its fields.
- *          ...
- *
- *          ret = hmm_mirror_register(&das->mirror, mm, &device_mirror_ops);
- *          if (ret) {
- *              // Cleanup on error
- *              return ret;
- *          }
- *
- *          // Other device driver specific initialization
- *          ...
- *      }
- *
- * Once an hmm_mirror is registered for an address space, the device driver
- * will get callbacks through sync_cpu_device_pagetables() operation (see
- * hmm_mirror_ops struct).
- *
- * Device driver must not free the struct containing the hmm_mirror struct
- * before calling hmm_mirror_unregister(). The expected usage is to do that when
- * the device driver is unbinding from an address space.
- *
- *
- *      void device_unbind_address_space(struct device_address_space *das)
- *      {
- *          // Device driver specific cleanup
- *          ...
- *
- *          hmm_mirror_unregister(&das->mirror);
- *
- *          // Other device driver specific cleanup, and now das can be freed
- *          ...
- *      }
- */
-
-struct hmm_mirror;
-
-/*
- * struct hmm_mirror_ops - HMM mirror device operations callback
- *
- * @update: callback to update range on a device
- */
-struct hmm_mirror_ops {
-	/* release() - release hmm_mirror
-	 *
-	 * @mirror: pointer to struct hmm_mirror
-	 *
-	 * This is called when the mm_struct is being released.  The callback
-	 * must ensure that all access to any pages obtained from this mirror
-	 * is halted before the callback returns. All future access should
-	 * fault.
-	 */
-	void (*release)(struct hmm_mirror *mirror);
-
-	/* sync_cpu_device_pagetables() - synchronize page tables
-	 *
-	 * @mirror: pointer to struct hmm_mirror
-	 * @update: update information (see struct mmu_notifier_range)
-	 * Return: -EAGAIN if mmu_notifier_range_blockable(update) is false
-	 * and callback needs to block, 0 otherwise.
-	 *
-	 * This callback ultimately originates from mmu_notifiers when the CPU
-	 * page table is updated. The device driver must update its page table
-	 * in response to this callback. The update argument tells what action
-	 * to perform.
-	 *
-	 * The device driver must not return from this callback until the device
-	 * page tables are completely updated (TLBs flushed, etc); this is a
-	 * synchronous call.
-	 */
-	int (*sync_cpu_device_pagetables)(
-		struct hmm_mirror *mirror,
-		const struct mmu_notifier_range *update);
-};
-
-/*
- * struct hmm_mirror - mirror struct for a device driver
- *
- * @hmm: pointer to struct hmm (which is unique per mm_struct)
- * @ops: device driver callback for HMM mirror operations
- * @list: for list of mirrors of a given mm
- *
- * Each address space (mm_struct) being mirrored by a device must register one
- * instance of an hmm_mirror struct with HMM. HMM will track the list of all
- * mirrors for each mm_struct.
- */
-struct hmm_mirror {
-	struct hmm			*hmm;
-	const struct hmm_mirror_ops	*ops;
-	struct list_head		list;
-};
-
 /*
  * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case.
  */
@@ -381,15 +226,9 @@  struct hmm_mirror {
 #define HMM_FAULT_SNAPSHOT		(1 << 1)
 
 #ifdef CONFIG_HMM_MIRROR
-int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm);
-void hmm_mirror_unregister(struct hmm_mirror *mirror);
-
 /*
  * Please see Documentation/vm/hmm.rst for how to use the range API.
  */
-int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror);
-void hmm_range_unregister(struct hmm_range *range);
-
 long hmm_range_fault(struct hmm_range *range, unsigned int flags);
 
 long hmm_range_dma_map(struct hmm_range *range,
@@ -401,24 +240,6 @@  long hmm_range_dma_unmap(struct hmm_range *range,
 			 dma_addr_t *daddrs,
 			 bool dirty);
 #else
-int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
-{
-	return -EOPNOTSUPP;
-}
-
-void hmm_mirror_unregister(struct hmm_mirror *mirror)
-{
-}
-
-int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror)
-{
-	return -EOPNOTSUPP;
-}
-
-void hmm_range_unregister(struct hmm_range *range)
-{
-}
-
 static inline long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 {
 	return -EOPNOTSUPP;
diff --git a/mm/Kconfig b/mm/Kconfig
index d0b5046d9aeffd..e38ff1d5968dbf 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -675,7 +675,6 @@  config DEV_PAGEMAP_OPS
 config HMM_MIRROR
 	bool
 	depends on MMU
-	depends on MMU_NOTIFIER
 
 config DEVICE_PRIVATE
 	bool "Unaddressable device memory (GPU memory, ...)"
diff --git a/mm/hmm.c b/mm/hmm.c
index 22ac3595771feb..75d15a820e182e 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -26,193 +26,6 @@ 
 #include <linux/mmu_notifier.h>
 #include <linux/memory_hotplug.h>
 
-static struct mmu_notifier *hmm_alloc_notifier(struct mm_struct *mm)
-{
-	struct hmm *hmm;
-
-	hmm = kzalloc(sizeof(*hmm), GFP_KERNEL);
-	if (!hmm)
-		return ERR_PTR(-ENOMEM);
-
-	init_waitqueue_head(&hmm->wq);
-	INIT_LIST_HEAD(&hmm->mirrors);
-	init_rwsem(&hmm->mirrors_sem);
-	INIT_LIST_HEAD(&hmm->ranges);
-	spin_lock_init(&hmm->ranges_lock);
-	hmm->notifiers = 0;
-	return &hmm->mmu_notifier;
-}
-
-static void hmm_free_notifier(struct mmu_notifier *mn)
-{
-	struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
-
-	WARN_ON(!list_empty(&hmm->ranges));
-	WARN_ON(!list_empty(&hmm->mirrors));
-	kfree(hmm);
-}
-
-static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
-{
-	struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
-	struct hmm_mirror *mirror;
-
-	/*
-	 * Since hmm_range_register() holds the mmget() lock hmm_release() is
-	 * prevented as long as a range exists.
-	 */
-	WARN_ON(!list_empty_careful(&hmm->ranges));
-
-	down_read(&hmm->mirrors_sem);
-	list_for_each_entry(mirror, &hmm->mirrors, list) {
-		/*
-		 * Note: The driver is not allowed to trigger
-		 * hmm_mirror_unregister() from this thread.
-		 */
-		if (mirror->ops->release)
-			mirror->ops->release(mirror);
-	}
-	up_read(&hmm->mirrors_sem);
-}
-
-static void notifiers_decrement(struct hmm *hmm)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&hmm->ranges_lock, flags);
-	hmm->notifiers--;
-	if (!hmm->notifiers) {
-		struct hmm_range *range;
-
-		list_for_each_entry(range, &hmm->ranges, list) {
-			if (range->valid)
-				continue;
-			range->valid = true;
-		}
-		wake_up_all(&hmm->wq);
-	}
-	spin_unlock_irqrestore(&hmm->ranges_lock, flags);
-}
-
-static int hmm_invalidate_range_start(struct mmu_notifier *mn,
-			const struct mmu_notifier_range *nrange)
-{
-	struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
-	struct hmm_mirror *mirror;
-	struct hmm_range *range;
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&hmm->ranges_lock, flags);
-	hmm->notifiers++;
-	list_for_each_entry(range, &hmm->ranges, list) {
-		if (nrange->end < range->start || nrange->start >= range->end)
-			continue;
-
-		range->valid = false;
-	}
-	spin_unlock_irqrestore(&hmm->ranges_lock, flags);
-
-	if (mmu_notifier_range_blockable(nrange))
-		down_read(&hmm->mirrors_sem);
-	else if (!down_read_trylock(&hmm->mirrors_sem)) {
-		ret = -EAGAIN;
-		goto out;
-	}
-
-	list_for_each_entry(mirror, &hmm->mirrors, list) {
-		int rc;
-
-		rc = mirror->ops->sync_cpu_device_pagetables(mirror, nrange);
-		if (rc) {
-			if (WARN_ON(mmu_notifier_range_blockable(nrange) ||
-			    rc != -EAGAIN))
-				continue;
-			ret = -EAGAIN;
-			break;
-		}
-	}
-	up_read(&hmm->mirrors_sem);
-
-out:
-	if (ret)
-		notifiers_decrement(hmm);
-	return ret;
-}
-
-static void hmm_invalidate_range_end(struct mmu_notifier *mn,
-			const struct mmu_notifier_range *nrange)
-{
-	struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
-
-	notifiers_decrement(hmm);
-}
-
-static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
-	.release		= hmm_release,
-	.invalidate_range_start	= hmm_invalidate_range_start,
-	.invalidate_range_end	= hmm_invalidate_range_end,
-	.alloc_notifier		= hmm_alloc_notifier,
-	.free_notifier		= hmm_free_notifier,
-};
-
-/*
- * hmm_mirror_register() - register a mirror against an mm
- *
- * @mirror: new mirror struct to register
- * @mm: mm to register against
- * Return: 0 on success, -ENOMEM if no memory, -EINVAL if invalid arguments
- *
- * To start mirroring a process address space, the device driver must register
- * an HMM mirror struct.
- *
- * The caller cannot unregister the hmm_mirror while any ranges are
- * registered.
- *
- * Callers using this function must put a call to mmu_notifier_synchronize()
- * in their module exit functions.
- */
-int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
-{
-	struct mmu_notifier *mn;
-
-	lockdep_assert_held_write(&mm->mmap_sem);
-
-	/* Sanity check */
-	if (!mm || !mirror || !mirror->ops)
-		return -EINVAL;
-
-	mn = mmu_notifier_get_locked(&hmm_mmu_notifier_ops, mm);
-	if (IS_ERR(mn))
-		return PTR_ERR(mn);
-	mirror->hmm = container_of(mn, struct hmm, mmu_notifier);
-
-	down_write(&mirror->hmm->mirrors_sem);
-	list_add(&mirror->list, &mirror->hmm->mirrors);
-	up_write(&mirror->hmm->mirrors_sem);
-
-	return 0;
-}
-EXPORT_SYMBOL(hmm_mirror_register);
-
-/*
- * hmm_mirror_unregister() - unregister a mirror
- *
- * @mirror: mirror struct to unregister
- *
- * Stop mirroring a process address space, and cleanup.
- */
-void hmm_mirror_unregister(struct hmm_mirror *mirror)
-{
-	struct hmm *hmm = mirror->hmm;
-
-	down_write(&hmm->mirrors_sem);
-	list_del(&mirror->list);
-	up_write(&hmm->mirrors_sem);
-	mmu_notifier_put(&hmm->mmu_notifier);
-}
-EXPORT_SYMBOL(hmm_mirror_unregister);
-
 struct hmm_vma_walk {
 	struct hmm_range	*range;
 	struct dev_pagemap	*pgmap;
@@ -779,87 +592,6 @@  static void hmm_pfns_clear(struct hmm_range *range,
 		*pfns = range->values[HMM_PFN_NONE];
 }
 
-/*
- * hmm_range_register() - start tracking change to CPU page table over a range
- * @range: range
- * @mm: the mm struct for the range of virtual address
- *
- * Return: 0 on success, -EFAULT if the address space is no longer valid
- *
- * Track updates to the CPU page table see include/linux/hmm.h
- */
-int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror)
-{
-	struct hmm *hmm = mirror->hmm;
-	unsigned long flags;
-
-	range->valid = false;
-	range->hmm = NULL;
-
-	if ((range->start & (PAGE_SIZE - 1)) || (range->end & (PAGE_SIZE - 1)))
-		return -EINVAL;
-	if (range->start >= range->end)
-		return -EINVAL;
-
-	/* Prevent hmm_release() from running while the range is valid */
-	if (!mmget_not_zero(hmm->mmu_notifier.mm))
-		return -EFAULT;
-
-	/* Initialize range to track CPU page table updates. */
-	spin_lock_irqsave(&hmm->ranges_lock, flags);
-
-	range->hmm = hmm;
-	list_add(&range->list, &hmm->ranges);
-
-	/*
-	 * If there are any concurrent notifiers we have to wait for them for
-	 * the range to be valid (see hmm_range_wait_until_valid()).
-	 */
-	if (!hmm->notifiers)
-		range->valid = true;
-	spin_unlock_irqrestore(&hmm->ranges_lock, flags);
-
-	return 0;
-}
-EXPORT_SYMBOL(hmm_range_register);
-
-/*
- * hmm_range_unregister() - stop tracking change to CPU page table over a range
- * @range: range
- *
- * Range struct is used to track updates to the CPU page table after a call to
- * hmm_range_register(). See include/linux/hmm.h for how to use it.
- */
-void hmm_range_unregister(struct hmm_range *range)
-{
-	struct hmm *hmm = range->hmm;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hmm->ranges_lock, flags);
-	list_del_init(&range->list);
-	spin_unlock_irqrestore(&hmm->ranges_lock, flags);
-
-	/* Drop reference taken by hmm_range_register() */
-	mmput(hmm->mmu_notifier.mm);
-
-	/*
-	 * The range is now invalid and the ref on the hmm is dropped, so
-	 * poison the pointer.  Leave other fields in place, for the caller's
-	 * use.
-	 */
-	range->valid = false;
-	memset(&range->hmm, POISON_INUSE, sizeof(range->hmm));
-}
-EXPORT_SYMBOL(hmm_range_unregister);
-
-static bool needs_retry(struct hmm_range *range)
-{
-	if (range->notifier)
-		return mmu_range_check_retry(range->notifier,
-					     range->notifier_seq);
-	return !range->valid;
-}
-
 static const struct mm_walk_ops hmm_walk_ops = {
 	.pud_entry	= hmm_vma_walk_pud,
 	.pmd_entry	= hmm_vma_walk_pmd,
@@ -900,20 +632,15 @@  long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 	const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
 	unsigned long start = range->start, end;
 	struct hmm_vma_walk hmm_vma_walk;
-	struct mm_struct *mm;
+	struct mm_struct *mm = range->notifier->mm;
 	struct vm_area_struct *vma;
 	int ret;
 
-	if (range->notifier)
-		mm = range->notifier->mm;
-	else
-		mm = range->hmm->mmu_notifier.mm;
-
 	lockdep_assert_held(&mm->mmap_sem);
 
 	do {
 		/* If range is no longer valid force retry. */
-		if (needs_retry(range))
+		if (mmu_range_check_retry(range->notifier, range->notifier_seq))
 			return -EBUSY;
 
 		vma = find_vma(mm, start);
@@ -946,7 +673,9 @@  long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 			start = hmm_vma_walk.last;
 
 			/* Keep trying while the range is valid. */
-		} while (ret == -EBUSY && !needs_retry(range));
+		} while (ret == -EBUSY &&
+			 !mmu_range_check_retry(range->notifier,
+						range->notifier_seq));
 
 		if (ret) {
 			unsigned long i;
@@ -1004,7 +733,8 @@  long hmm_range_dma_map(struct hmm_range *range, struct device *device,
 			continue;
 
 		/* Check if range is being invalidated */
-		if (needs_retry(range)) {
+		if (mmu_range_check_retry(range->notifier,
+					  range->notifier_seq)) {
 			ret = -EBUSY;
 			goto unmap;
 		}