diff mbox series

[v1,2/4] mm/mmu_notifier: drop owner from MMU_NOTIFY_EXCLUSIVE

Message ID 20250129115803.2084769-3-david@redhat.com (mailing list archive)
State New
Headers show
Series mm: cleanups for device-exclusive entries (hmm) | expand

Commit Message

David Hildenbrand Jan. 29, 2025, 11:58 a.m. UTC
We no longer get a MMU_NOTIFY_EXCLUSIVE on conversion with the owner set
that one has to filter out: if there already *is* a device-exclusive
entry (e.g., other device, we don't have that information), GUP will
convert it back to an ordinary PTE and notify via
remove_device_exclusive_entry().

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_svm.c | 6 +-----
 include/linux/mmu_notifier.h          | 4 +---
 include/linux/rmap.h                  | 2 +-
 lib/test_hmm.c                        | 2 +-
 mm/rmap.c                             | 3 +--
 5 files changed, 5 insertions(+), 12 deletions(-)

Comments

Alistair Popple Jan. 30, 2025, 5:34 a.m. UTC | #1
On Wed, Jan 29, 2025 at 12:58:00PM +0100, David Hildenbrand wrote:
> We no longer get a MMU_NOTIFY_EXCLUSIVE on conversion with the owner set
> that one has to filter out: if there already *is* a device-exclusive
> entry (e.g., other device, we don't have that information), GUP will
> convert it back to an ordinary PTE and notify via
> remove_device_exclusive_entry().

What tree is this against? I tried applying to v6.13 and Linus current master
but neither applied cleanly.
 
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  drivers/gpu/drm/nouveau/nouveau_svm.c | 6 +-----
>  include/linux/mmu_notifier.h          | 4 +---
>  include/linux/rmap.h                  | 2 +-
>  lib/test_hmm.c                        | 2 +-
>  mm/rmap.c                             | 3 +--
>  5 files changed, 5 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
> index 39e3740980bb..4758fee182b4 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_svm.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
> @@ -510,10 +510,6 @@ static bool nouveau_svm_range_invalidate(struct mmu_interval_notifier *mni,
>  	struct svm_notifier *sn =
>  		container_of(mni, struct svm_notifier, notifier);
>  
> -	if (range->event == MMU_NOTIFY_EXCLUSIVE &&
> -	    range->owner == sn->svmm->vmm->cli->drm->dev)
> -		return true;

I think this will cause a live-lock because make_device_exclusive_range()
will call the notifier which without the filtering will increment the sequence
count and cause endless retries of the loop in nouveau_atomic_range_fault().
The notifier needs to be able to figure out if it was called in response to
something this thread did (ie. make_device_exclusive_range) and can therefore
ignore the invalidation, or from some other thread.

Looking at hmm_test I see that doesn't use the sequence counter to ensure
the PTE remains valid whilst it is mapped. I think that is probably wrong, so
apologies if that lead you astray.

>  	/*
>  	 * serializes the update to mni->invalidate_seq done by caller and
>  	 * prevents invalidation of the PTE from progressing while HW is being
> @@ -609,7 +605,7 @@ static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm,
>  
>  		notifier_seq = mmu_interval_read_begin(&notifier->notifier);
>  		mmap_read_lock(mm);
> -		page = make_device_exclusive(mm, start, drm->dev, &folio);
> +		page = make_device_exclusive(mm, start, &folio);
>  		mmap_read_unlock(mm);
>  		if (IS_ERR(page)) {
>  			ret = -EINVAL;
> diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
> index d4e714661826..bac2385099dd 100644
> --- a/include/linux/mmu_notifier.h
> +++ b/include/linux/mmu_notifier.h
> @@ -44,9 +44,7 @@ struct mmu_interval_notifier;
>   * owner field matches the driver's device private pgmap owner.
>   *
>   * @MMU_NOTIFY_EXCLUSIVE: to signal a device driver that the device will no
> - * longer have exclusive access to the page. When sent during creation of an
> - * exclusive range the owner will be initialised to the value provided by the
> - * caller of make_device_exclusive(), otherwise the owner will be NULL.
> + * longer have exclusive access to the page.
>   */
>  enum mmu_notifier_event {
>  	MMU_NOTIFY_UNMAP = 0,
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index 86425d42c1a9..3b216b91d2e5 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -664,7 +664,7 @@ void try_to_migrate(struct folio *folio, enum ttu_flags flags);
>  void try_to_unmap(struct folio *, enum ttu_flags flags);
>  
>  struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr,
> -		void *owner, struct folio **foliop);
> +		struct folio **foliop);
>  
>  /* Avoid racy checks */
>  #define PVMW_SYNC		(1 << 0)
> diff --git a/lib/test_hmm.c b/lib/test_hmm.c
> index 1c0a58279db9..8520c1d1b21b 100644
> --- a/lib/test_hmm.c
> +++ b/lib/test_hmm.c
> @@ -786,7 +786,7 @@ static int dmirror_exclusive(struct dmirror *dmirror,
>  		struct folio *folio;
>  		struct page *page;
>  
> -		page = make_device_exclusive(mm, addr, &folio, NULL);
> +		page = make_device_exclusive(mm, addr, &folio);
>  		if (IS_ERR(page)) {
>  			ret = PTR_ERR(page);
>  			break;
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 4acc9f6d743a..d99dbf59adc6 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -2397,7 +2397,6 @@ void try_to_migrate(struct folio *folio, enum ttu_flags flags)
>   * make_device_exclusive() - Mark an address for exclusive use by a device
>   * @mm: mm_struct of associated target process
>   * @addr: the virtual address to mark for exclusive device access
> - * @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier to allow filtering
>   * @foliop: folio pointer will be stored here on success.
>   *
>   * This function looks up the page mapped at the given address, grabs a
> @@ -2421,7 +2420,7 @@ void try_to_migrate(struct folio *folio, enum ttu_flags flags)
>   * Returns: pointer to mapped page on success, otherwise a negative error.
>   */
>  struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr,
> -		void *owner, struct folio **foliop)
> +		struct folio **foliop)
>  {
>  	struct folio *folio, *fw_folio;
>  	struct vm_area_struct *vma;
> -- 
> 2.48.1
>
David Hildenbrand Jan. 30, 2025, 9:28 a.m. UTC | #2
On 30.01.25 06:34, Alistair Popple wrote:
> On Wed, Jan 29, 2025 at 12:58:00PM +0100, David Hildenbrand wrote:
>> We no longer get a MMU_NOTIFY_EXCLUSIVE on conversion with the owner set
>> that one has to filter out: if there already *is* a device-exclusive
>> entry (e.g., other device, we don't have that information), GUP will
>> convert it back to an ordinary PTE and notify via
>> remove_device_exclusive_entry().
> 
> What tree is this against? I tried applying to v6.13 and Linus current master
> but neither applied cleanly.

See the cover letter. This is on top of the fixes series, which is based 
on mm-unstable from yesterday.

>   
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> ---
>>   drivers/gpu/drm/nouveau/nouveau_svm.c | 6 +-----
>>   include/linux/mmu_notifier.h          | 4 +---
>>   include/linux/rmap.h                  | 2 +-
>>   lib/test_hmm.c                        | 2 +-
>>   mm/rmap.c                             | 3 +--
>>   5 files changed, 5 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
>> index 39e3740980bb..4758fee182b4 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_svm.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
>> @@ -510,10 +510,6 @@ static bool nouveau_svm_range_invalidate(struct mmu_interval_notifier *mni,
>>   	struct svm_notifier *sn =
>>   		container_of(mni, struct svm_notifier, notifier);
>>   
>> -	if (range->event == MMU_NOTIFY_EXCLUSIVE &&
>> -	    range->owner == sn->svmm->vmm->cli->drm->dev)
>> -		return true;
> 
> I think this will cause a live-lock because make_device_exclusive_range()
> will call the notifier which without the filtering will increment the sequence
> count and cause endless retries of the loop in nouveau_atomic_range_fault().
> The notifier needs to be able to figure out if it was called in response to
> something this thread did (ie. make_device_exclusive_range) and can therefore
> ignore the invalidation, or from some other thread.

Yes, as discussed in the other patch, this must stay to inform secondary 
MMUs about the conversion *to* device exclusive.

> 
> Looking at hmm_test I see that doesn't use the sequence counter to ensure
> the PTE remains valid whilst it is mapped. I think that is probably wrong, so
> apologies if that lead you astray.

Yes, the hmm_test does not completely follow the same model the nouveau 
implementation does; so it might not be completely correct.
Simona Vetter Jan. 30, 2025, 1:29 p.m. UTC | #3
On Thu, Jan 30, 2025 at 10:28:00AM +0100, David Hildenbrand wrote:
> On 30.01.25 06:34, Alistair Popple wrote:
> > Looking at hmm_test I see that doesn't use the sequence counter to ensure
> > the PTE remains valid whilst it is mapped. I think that is probably wrong, so
> > apologies if that lead you astray.
> 
> Yes, the hmm_test does not completely follow the same model the nouveau
> implementation does; so it might not be completely correct.

But unrelated but just crossed my mind:

I guess another crucial difference is that the hw (probably, not sure)
will restart the fault if we don't repair it to its liking. So the
hmm-test does need some kind of retry loop too somewhere to match that.
But might be good to also still land some of the other improvements
discussed in these threads to make make_device_exclusive a bit more
reliable instead of relying on busy-looping throug the hw fault handler
for everything.
-Sima
David Hildenbrand Jan. 30, 2025, 3:26 p.m. UTC | #4
On 30.01.25 14:29, Simona Vetter wrote:
> On Thu, Jan 30, 2025 at 10:28:00AM +0100, David Hildenbrand wrote:
>> On 30.01.25 06:34, Alistair Popple wrote:
>>> Looking at hmm_test I see that doesn't use the sequence counter to ensure
>>> the PTE remains valid whilst it is mapped. I think that is probably wrong, so
>>> apologies if that lead you astray.
>>
>> Yes, the hmm_test does not completely follow the same model the nouveau
>> implementation does; so it might not be completely correct.
> 
> But unrelated but just crossed my mind:
> 
> I guess another crucial difference is that the hw (probably, not sure)
> will restart the fault if we don't repair it to its liking. So the
> hmm-test does need some kind of retry loop too somewhere to match that.

Yes. Especially for the folio lock spinning is a rather suboptimal 
approach. So we likely would want the option to just lock it instead of 
try-locking it. (or getting rid of it entirely :) )

> But might be good to also still land some of the other improvements
> discussed in these threads to make make_device_exclusive a bit more
> reliable instead of relying on busy-looping throug the hw fault handler
> for everything.

Right.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 39e3740980bb..4758fee182b4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -510,10 +510,6 @@  static bool nouveau_svm_range_invalidate(struct mmu_interval_notifier *mni,
 	struct svm_notifier *sn =
 		container_of(mni, struct svm_notifier, notifier);
 
-	if (range->event == MMU_NOTIFY_EXCLUSIVE &&
-	    range->owner == sn->svmm->vmm->cli->drm->dev)
-		return true;
-
 	/*
 	 * serializes the update to mni->invalidate_seq done by caller and
 	 * prevents invalidation of the PTE from progressing while HW is being
@@ -609,7 +605,7 @@  static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm,
 
 		notifier_seq = mmu_interval_read_begin(&notifier->notifier);
 		mmap_read_lock(mm);
-		page = make_device_exclusive(mm, start, drm->dev, &folio);
+		page = make_device_exclusive(mm, start, &folio);
 		mmap_read_unlock(mm);
 		if (IS_ERR(page)) {
 			ret = -EINVAL;
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index d4e714661826..bac2385099dd 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -44,9 +44,7 @@  struct mmu_interval_notifier;
  * owner field matches the driver's device private pgmap owner.
  *
  * @MMU_NOTIFY_EXCLUSIVE: to signal a device driver that the device will no
- * longer have exclusive access to the page. When sent during creation of an
- * exclusive range the owner will be initialised to the value provided by the
- * caller of make_device_exclusive(), otherwise the owner will be NULL.
+ * longer have exclusive access to the page.
  */
 enum mmu_notifier_event {
 	MMU_NOTIFY_UNMAP = 0,
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 86425d42c1a9..3b216b91d2e5 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -664,7 +664,7 @@  void try_to_migrate(struct folio *folio, enum ttu_flags flags);
 void try_to_unmap(struct folio *, enum ttu_flags flags);
 
 struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr,
-		void *owner, struct folio **foliop);
+		struct folio **foliop);
 
 /* Avoid racy checks */
 #define PVMW_SYNC		(1 << 0)
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 1c0a58279db9..8520c1d1b21b 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -786,7 +786,7 @@  static int dmirror_exclusive(struct dmirror *dmirror,
 		struct folio *folio;
 		struct page *page;
 
-		page = make_device_exclusive(mm, addr, &folio, NULL);
+		page = make_device_exclusive(mm, addr, &folio);
 		if (IS_ERR(page)) {
 			ret = PTR_ERR(page);
 			break;
diff --git a/mm/rmap.c b/mm/rmap.c
index 4acc9f6d743a..d99dbf59adc6 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -2397,7 +2397,6 @@  void try_to_migrate(struct folio *folio, enum ttu_flags flags)
  * make_device_exclusive() - Mark an address for exclusive use by a device
  * @mm: mm_struct of associated target process
  * @addr: the virtual address to mark for exclusive device access
- * @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier to allow filtering
  * @foliop: folio pointer will be stored here on success.
  *
  * This function looks up the page mapped at the given address, grabs a
@@ -2421,7 +2420,7 @@  void try_to_migrate(struct folio *folio, enum ttu_flags flags)
  * Returns: pointer to mapped page on success, otherwise a negative error.
  */
 struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr,
-		void *owner, struct folio **foliop)
+		struct folio **foliop)
 {
 	struct folio *folio, *fw_folio;
 	struct vm_area_struct *vma;