diff mbox series

[v2,3/5] mm/notifier: add migration invalidation type

Message ID 20200713172149.2310-4-rcampbell@nvidia.com (mailing list archive)
State Superseded
Delegated to: Jason Gunthorpe
Headers show
Series mm/migrate: avoid device private invalidations | expand

Commit Message

Ralph Campbell July 13, 2020, 5:21 p.m. UTC
Currently migrate_vma_setup() calls mmu_notifier_invalidate_range_start()
which flushes all device private page mappings whether or not a page
is being migrated to/from device private memory. In order to not disrupt
device mappings that are not being migrated, shift the responsibility
for clearing device private mappings to the device driver and leave
CPU page table unmapping handled by migrate_vma_setup(). To support
this, the caller of migrate_vma_setup() should always set struct
migrate_vma::src_owner to a non NULL value that matches the device
private page->pgmap->owner. This value is then passed to the struct
mmu_notifier_range with a new event type which the driver's invalidation
function can use to avoid device MMU invalidations.

Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
---
 include/linux/mmu_notifier.h | 7 +++++++
 mm/migrate.c                 | 8 +++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

Comments

Jason Gunthorpe July 20, 2020, 6:40 p.m. UTC | #1
On Mon, Jul 13, 2020 at 10:21:47AM -0700, Ralph Campbell wrote:
> Currently migrate_vma_setup() calls mmu_notifier_invalidate_range_start()
> which flushes all device private page mappings whether or not a page
> is being migrated to/from device private memory. In order to not disrupt
> device mappings that are not being migrated, shift the responsibility
> for clearing device private mappings to the device driver and leave
> CPU page table unmapping handled by migrate_vma_setup(). To support
> this, the caller of migrate_vma_setup() should always set struct
> migrate_vma::src_owner to a non NULL value that matches the device
> private page->pgmap->owner. This value is then passed to the struct
> mmu_notifier_range with a new event type which the driver's invalidation
> function can use to avoid device MMU invalidations.
> 
> Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
>  include/linux/mmu_notifier.h | 7 +++++++
>  mm/migrate.c                 | 8 +++++++-
>  2 files changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
> index fc68f3570e19..1921fcf6be5b 100644
> +++ b/include/linux/mmu_notifier.h
> @@ -38,6 +38,10 @@ struct mmu_interval_notifier;
>   *
>   * @MMU_NOTIFY_RELEASE: used during mmu_interval_notifier invalidate to signal
>   * that the mm refcount is zero and the range is no longer accessible.
> + *
> + * @MMU_NOTIFY_MIGRATE: used during migrate_vma_collect() invalidate to signal
> + * a device driver to possibly ignore the invalidation if the
> + * migrate_pgmap_owner field matches the driver's device private pgmap owner.
>   */
>  enum mmu_notifier_event {
>  	MMU_NOTIFY_UNMAP = 0,
> @@ -46,6 +50,7 @@ enum mmu_notifier_event {
>  	MMU_NOTIFY_PROTECTION_PAGE,
>  	MMU_NOTIFY_SOFT_DIRTY,
>  	MMU_NOTIFY_RELEASE,
> +	MMU_NOTIFY_MIGRATE,
>  };
>  
>  #define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0)
> @@ -264,6 +269,7 @@ struct mmu_notifier_range {
>  	unsigned long end;
>  	unsigned flags;
>  	enum mmu_notifier_event event;
> +	void *migrate_pgmap_owner;
>  };
>  
>  static inline int mm_has_notifiers(struct mm_struct *mm)
> @@ -513,6 +519,7 @@ static inline void mmu_notifier_range_init(struct mmu_notifier_range *range,
>  	range->start = start;
>  	range->end = end;
>  	range->flags = flags;
> +	range->migrate_pgmap_owner = NULL;
>  }
>  
>  #define ptep_clear_flush_young_notify(__vma, __address, __ptep)		\
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 2bbc5c4c672e..9b3dcb81be5f 100644
> +++ b/mm/migrate.c
> @@ -2391,8 +2391,14 @@ static void migrate_vma_collect(struct migrate_vma *migrate)
>  {
>  	struct mmu_notifier_range range;
>  
> -	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL,
> +	/*
> +	 * Note that the src_owner is passed to the mmu notifier callback so
> +	 * that the registered device driver can skip invalidating device
> +	 * private page mappings that won't be migrated.
> +	 */
> +	mmu_notifier_range_init(&range, MMU_NOTIFY_MIGRATE, 0, migrate->vma,
>  			migrate->vma->vm_mm, migrate->start, migrate->end);

So the idea is that src_owner is always set to the pgmap owner when
working with DEVICE_PRIVATE?

But then the comment in the prior patch should be fixed:

@@ -199,11 +204,12 @@  struct migrate_vma {
 
 	/*
 	 * Set to the owner value also stored in page->pgmap->owner for
+	 * migrating device private memory. The direction also needs to
+	 * be set to MIGRATE_VMA_FROM_DEVICE_PRIVATE.

To say the caller must always provide src_owner.

And that field should probably be renamed at this point, as there is
nothing "src" about it. It is just the pgmap_owner of the
DEVICE_PRIVATE pages the TO/FROM DEVICE migration is working on.

Jason
Ralph Campbell July 20, 2020, 7:56 p.m. UTC | #2
On 7/20/20 11:40 AM, Jason Gunthorpe wrote:
> On Mon, Jul 13, 2020 at 10:21:47AM -0700, Ralph Campbell wrote:
>> Currently migrate_vma_setup() calls mmu_notifier_invalidate_range_start()
>> which flushes all device private page mappings whether or not a page
>> is being migrated to/from device private memory. In order to not disrupt
>> device mappings that are not being migrated, shift the responsibility
>> for clearing device private mappings to the device driver and leave
>> CPU page table unmapping handled by migrate_vma_setup(). To support
>> this, the caller of migrate_vma_setup() should always set struct
>> migrate_vma::src_owner to a non NULL value that matches the device
>> private page->pgmap->owner. This value is then passed to the struct
>> mmu_notifier_range with a new event type which the driver's invalidation
>> function can use to avoid device MMU invalidations.
>>
>> Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
>>   include/linux/mmu_notifier.h | 7 +++++++
>>   mm/migrate.c                 | 8 +++++++-
>>   2 files changed, 14 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
>> index fc68f3570e19..1921fcf6be5b 100644
>> +++ b/include/linux/mmu_notifier.h
>> @@ -38,6 +38,10 @@ struct mmu_interval_notifier;
>>    *
>>    * @MMU_NOTIFY_RELEASE: used during mmu_interval_notifier invalidate to signal
>>    * that the mm refcount is zero and the range is no longer accessible.
>> + *
>> + * @MMU_NOTIFY_MIGRATE: used during migrate_vma_collect() invalidate to signal
>> + * a device driver to possibly ignore the invalidation if the
>> + * migrate_pgmap_owner field matches the driver's device private pgmap owner.
>>    */
>>   enum mmu_notifier_event {
>>   	MMU_NOTIFY_UNMAP = 0,
>> @@ -46,6 +50,7 @@ enum mmu_notifier_event {
>>   	MMU_NOTIFY_PROTECTION_PAGE,
>>   	MMU_NOTIFY_SOFT_DIRTY,
>>   	MMU_NOTIFY_RELEASE,
>> +	MMU_NOTIFY_MIGRATE,
>>   };
>>   
>>   #define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0)
>> @@ -264,6 +269,7 @@ struct mmu_notifier_range {
>>   	unsigned long end;
>>   	unsigned flags;
>>   	enum mmu_notifier_event event;
>> +	void *migrate_pgmap_owner;
>>   };
>>   
>>   static inline int mm_has_notifiers(struct mm_struct *mm)
>> @@ -513,6 +519,7 @@ static inline void mmu_notifier_range_init(struct mmu_notifier_range *range,
>>   	range->start = start;
>>   	range->end = end;
>>   	range->flags = flags;
>> +	range->migrate_pgmap_owner = NULL;
>>   }
>>   
>>   #define ptep_clear_flush_young_notify(__vma, __address, __ptep)		\
>> diff --git a/mm/migrate.c b/mm/migrate.c
>> index 2bbc5c4c672e..9b3dcb81be5f 100644
>> +++ b/mm/migrate.c
>> @@ -2391,8 +2391,14 @@ static void migrate_vma_collect(struct migrate_vma *migrate)
>>   {
>>   	struct mmu_notifier_range range;
>>   
>> -	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL,
>> +	/*
>> +	 * Note that the src_owner is passed to the mmu notifier callback so
>> +	 * that the registered device driver can skip invalidating device
>> +	 * private page mappings that won't be migrated.
>> +	 */
>> +	mmu_notifier_range_init(&range, MMU_NOTIFY_MIGRATE, 0, migrate->vma,
>>   			migrate->vma->vm_mm, migrate->start, migrate->end);
> 
> So the idea is that src_owner is always set to the pgmap owner when
> working with DEVICE_PRIVATE?
> 
> But then the comment in the prior patch should be fixed:
> 
> @@ -199,11 +204,12 @@  struct migrate_vma {
>   
>   	/*
>   	 * Set to the owner value also stored in page->pgmap->owner for
> +	 * migrating device private memory. The direction also needs to
> +	 * be set to MIGRATE_VMA_FROM_DEVICE_PRIVATE.
> 
> To say the caller must always provide src_owner.
> 
> And that field should probably be renamed at this point, as there is
> nothing "src" about it. It is just the pgmap_owner of the
> DEVICE_PRIVATE pages the TO/FROM DEVICE migration is working on.
> 
> Jason

Good point. I'll send a v3 with your suggested changes.
diff mbox series

Patch

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index fc68f3570e19..1921fcf6be5b 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -38,6 +38,10 @@  struct mmu_interval_notifier;
  *
  * @MMU_NOTIFY_RELEASE: used during mmu_interval_notifier invalidate to signal
  * that the mm refcount is zero and the range is no longer accessible.
+ *
+ * @MMU_NOTIFY_MIGRATE: used during migrate_vma_collect() invalidate to signal
+ * a device driver to possibly ignore the invalidation if the
+ * migrate_pgmap_owner field matches the driver's device private pgmap owner.
  */
 enum mmu_notifier_event {
 	MMU_NOTIFY_UNMAP = 0,
@@ -46,6 +50,7 @@  enum mmu_notifier_event {
 	MMU_NOTIFY_PROTECTION_PAGE,
 	MMU_NOTIFY_SOFT_DIRTY,
 	MMU_NOTIFY_RELEASE,
+	MMU_NOTIFY_MIGRATE,
 };
 
 #define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0)
@@ -264,6 +269,7 @@  struct mmu_notifier_range {
 	unsigned long end;
 	unsigned flags;
 	enum mmu_notifier_event event;
+	void *migrate_pgmap_owner;
 };
 
 static inline int mm_has_notifiers(struct mm_struct *mm)
@@ -513,6 +519,7 @@  static inline void mmu_notifier_range_init(struct mmu_notifier_range *range,
 	range->start = start;
 	range->end = end;
 	range->flags = flags;
+	range->migrate_pgmap_owner = NULL;
 }
 
 #define ptep_clear_flush_young_notify(__vma, __address, __ptep)		\
diff --git a/mm/migrate.c b/mm/migrate.c
index 2bbc5c4c672e..9b3dcb81be5f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2391,8 +2391,14 @@  static void migrate_vma_collect(struct migrate_vma *migrate)
 {
 	struct mmu_notifier_range range;
 
-	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL,
+	/*
+	 * Note that the src_owner is passed to the mmu notifier callback so
+	 * that the registered device driver can skip invalidating device
+	 * private page mappings that won't be migrated.
+	 */
+	mmu_notifier_range_init(&range, MMU_NOTIFY_MIGRATE, 0, migrate->vma,
 			migrate->vma->vm_mm, migrate->start, migrate->end);
+	range.migrate_pgmap_owner = migrate->src_owner;
 	mmu_notifier_invalidate_range_start(&range);
 
 	walk_page_range(migrate->vma->vm_mm, migrate->start, migrate->end,