diff mbox series

[10/34] drm/amdkfd: svm range eviction and restore

Message ID 20210401042228.1423-11-Felix.Kuehling@amd.com (mailing list archive)
State New, archived
Headers show
Series Add HMM-based SVM memory manager to KFD v3 | expand

Commit Message

Felix Kuehling April 1, 2021, 4:22 a.m. UTC
HMM interval notifier callback notify CPU page table will be updated,
stop process queues if the updated address belongs to svm range
registered in process svms objects tree. Scheduled restore work to
update GPU page table using new pages address in the updated svm range.

The restore worker flushes any deferred work to make sure it restores
an up-to-date svm_range_list.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |   1 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 137 ++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h     |   2 +
 4 files changed, 141 insertions(+), 1 deletion(-)

Comments

kernel test robot April 1, 2021, 3:19 p.m. UTC | #1
Hi Felix,

I love your patch! Perhaps something to improve:

[auto build test WARNING on next-20210331]
[cannot apply to drm-intel/for-linux-next drm-tip/drm-tip drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc5 v5.12-rc4 v5.12-rc3 v5.12-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Felix-Kuehling/Add-HMM-based-SVM-memory-manager-to-KFD-v3/20210401-122712
base:    7a43c78d0573e0bbbb0456b033e2b9a895b89464
config: x86_64-randconfig-a011-20210401 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/07dfb6a9dad338dae38a3a840ce14f77e7498b1f
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Felix-Kuehling/Add-HMM-based-SVM-memory-manager-to-KFD-v3/20210401-122712
        git checkout 07dfb6a9dad338dae38a3a840ce14f77e7498b1f
        # save the attached .config to linux build tree
        make W=1 ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:170:6: warning: no previous prototype for 'svm_range_dma_unmap' [-Wmissing-prototypes]
     170 | void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
         |      ^~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:603:6: warning: no previous prototype for 'svm_range_add_child' [-Wmissing-prototypes]
     603 | void svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
         |      ^~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:799:5: warning: no previous prototype for 'svm_range_reserve_bos' [-Wmissing-prototypes]
     799 | int svm_range_reserve_bos(struct svm_validate_context *ctx)
         |     ^~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:853:6: warning: no previous prototype for 'svm_range_unreserve_bos' [-Wmissing-prototypes]
     853 | void svm_range_unreserve_bos(struct svm_validate_context *ctx)
         |      ^~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c: In function 'svm_range_evict':
>> drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:1071:6: warning: variable 'invalid' set but not used [-Wunused-but-set-variable]
    1071 |  int invalid, evicted_ranges;
         |      ^~~~~~~
   drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c: At top level:
   drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:1094:19: warning: no previous prototype for 'svm_range_clone' [-Wmissing-prototypes]
    1094 | struct svm_range *svm_range_clone(struct svm_range *old)
         |                   ^~~~~~~~~~~~~~~
   drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:1349:1: warning: no previous prototype for 'svm_range_add_list_work' [-Wmissing-prototypes]
    1349 | svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
         | ^~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:1371:6: warning: no previous prototype for 'schedule_deferred_list_work' [-Wmissing-prototypes]
    1371 | void schedule_deferred_list_work(struct svm_range_list *svms)
         |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~


vim +/invalid +1071 drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c

  1054	
  1055	/**
  1056	 * svm_range_evict - evict svm range
  1057	 *
  1058	 * Stop all queues of the process to ensure GPU doesn't access the memory, then
  1059	 * return to let CPU evict the buffer and proceed CPU pagetable update.
  1060	 *
  1061	 * Don't need use lock to sync cpu pagetable invalidation with GPU execution.
  1062	 * If invalidation happens while restore work is running, restore work will
  1063	 * restart to ensure to get the latest CPU pages mapping to GPU, then start
  1064	 * the queues.
  1065	 */
  1066	static int
  1067	svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
  1068			unsigned long start, unsigned long last)
  1069	{
  1070		struct svm_range_list *svms = prange->svms;
> 1071		int invalid, evicted_ranges;
  1072		int r = 0;
  1073	
  1074		invalid = atomic_inc_return(&prange->invalid);
  1075		evicted_ranges = atomic_inc_return(&svms->evicted_ranges);
  1076		if (evicted_ranges != 1)
  1077			return r;
  1078	
  1079		pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
  1080			 prange->svms, prange->start, prange->last);
  1081	
  1082		/* First eviction, stop the queues */
  1083		r = kgd2kfd_quiesce_mm(mm);
  1084		if (r)
  1085			pr_debug("failed to quiesce KFD\n");
  1086	
  1087		pr_debug("schedule to restore svm %p ranges\n", svms);
  1088		schedule_delayed_work(&svms->restore_work,
  1089			msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
  1090	
  1091		return r;
  1092	}
  1093	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0d19a13fc227..1b829eef9e50 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -738,6 +738,8 @@  struct svm_range_list {
 	struct work_struct		deferred_list_work;
 	struct list_head		deferred_range_list;
 	spinlock_t			deferred_list_lock;
+	atomic_t			evicted_ranges;
+	struct delayed_work		restore_work;
 };
 
 /* Process data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 1f3d4f5c64a8..3bcde43ccd70 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1064,6 +1064,7 @@  static void kfd_process_notifier_release(struct mmu_notifier *mn,
 
 	cancel_delayed_work_sync(&p->eviction_work);
 	cancel_delayed_work_sync(&p->restore_work);
+	cancel_delayed_work_sync(&p->svms.restore_work);
 
 	mutex_lock(&p->mutex);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 4736fe996feb..b0e0b243754c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -21,6 +21,7 @@ 
  */
 
 #include <linux/types.h>
+#include <linux/sched/task.h>
 #include "amdgpu_sync.h"
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
@@ -28,6 +29,8 @@ 
 #include "kfd_priv.h"
 #include "kfd_svm.h"
 
+#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
+
 static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
 				    const struct mmu_notifier_range *range,
@@ -250,6 +253,7 @@  svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
 	INIT_LIST_HEAD(&prange->insert_list);
 	INIT_LIST_HEAD(&prange->deferred_list);
 	INIT_LIST_HEAD(&prange->child_list);
+	atomic_set(&prange->invalid, 0);
 	mutex_init(&prange->lock);
 	svm_range_set_default_attributes(&prange->preferred_loc,
 					 &prange->prefetch_loc,
@@ -964,6 +968,129 @@  svm_range_list_lock_and_flush_work(struct svm_range_list *svms,
 	goto retry_flush_work;
 }
 
+static void svm_range_restore_work(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct amdkfd_process_info *process_info;
+	struct svm_range_list *svms;
+	struct svm_range *prange;
+	struct kfd_process *p;
+	struct mm_struct *mm;
+	int evicted_ranges;
+	int invalid;
+	int r;
+
+	svms = container_of(dwork, struct svm_range_list, restore_work);
+	evicted_ranges = atomic_read(&svms->evicted_ranges);
+	if (!evicted_ranges)
+		return;
+
+	pr_debug("restore svm ranges\n");
+
+	/* kfd_process_notifier_release destroys this worker thread. So during
+	 * the lifetime of this thread, kfd_process and mm will be valid.
+	 */
+	p = container_of(svms, struct kfd_process, svms);
+	process_info = p->kgd_process_info;
+	mm = p->mm;
+	if (!mm)
+		return;
+
+	mutex_lock(&process_info->lock);
+	svm_range_list_lock_and_flush_work(svms, mm);
+	mutex_lock(&svms->lock);
+
+	evicted_ranges = atomic_read(&svms->evicted_ranges);
+
+	list_for_each_entry(prange, &svms->list, list) {
+		invalid = atomic_read(&prange->invalid);
+		if (!invalid)
+			continue;
+
+		pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n",
+			 prange->svms, prange, prange->start, prange->last,
+			 invalid);
+
+		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+					       false, true);
+		if (r) {
+			pr_debug("failed %d to map 0x%lx to gpus\n", r,
+				 prange->start);
+			goto unlock_out;
+		}
+
+		if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid)
+			goto unlock_out;
+	}
+
+	if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) !=
+	    evicted_ranges)
+		goto unlock_out;
+
+	evicted_ranges = 0;
+
+	r = kgd2kfd_resume_mm(mm);
+	if (r) {
+		/* No recovery from this failure. Probably the CP is
+		 * hanging. No point trying again.
+		 */
+		pr_debug("failed %d to resume KFD\n", r);
+	}
+
+	pr_debug("restore svm ranges successfully\n");
+
+unlock_out:
+	mutex_unlock(&svms->lock);
+	mmap_write_unlock(mm);
+	mutex_unlock(&process_info->lock);
+
+	/* If validation failed, reschedule another attempt */
+	if (evicted_ranges) {
+		pr_debug("reschedule to restore svm range\n");
+		schedule_delayed_work(&svms->restore_work,
+			msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+	}
+}
+
+/**
+ * svm_range_evict - evict svm range
+ *
+ * Stop all queues of the process to ensure GPU doesn't access the memory, then
+ * return to let CPU evict the buffer and proceed CPU pagetable update.
+ *
+ * Don't need use lock to sync cpu pagetable invalidation with GPU execution.
+ * If invalidation happens while restore work is running, restore work will
+ * restart to ensure to get the latest CPU pages mapping to GPU, then start
+ * the queues.
+ */
+static int
+svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
+		unsigned long start, unsigned long last)
+{
+	struct svm_range_list *svms = prange->svms;
+	int invalid, evicted_ranges;
+	int r = 0;
+
+	invalid = atomic_inc_return(&prange->invalid);
+	evicted_ranges = atomic_inc_return(&svms->evicted_ranges);
+	if (evicted_ranges != 1)
+		return r;
+
+	pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
+		 prange->svms, prange->start, prange->last);
+
+	/* First eviction, stop the queues */
+	r = kgd2kfd_quiesce_mm(mm);
+	if (r)
+		pr_debug("failed to quiesce KFD\n");
+
+	pr_debug("schedule to restore svm %p ranges\n", svms);
+	schedule_delayed_work(&svms->restore_work,
+		msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+
+	return r;
+}
+
 struct svm_range *svm_range_clone(struct svm_range *old)
 {
 	struct svm_range *new;
@@ -1336,6 +1463,11 @@  svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
  * svm_range_cpu_invalidate_pagetables - interval notifier callback
  *
  * MMU range unmap notifier to remove svm ranges
+ *
+ * If GPU vm fault retry is not enabled, evict the svm range, then restore
+ * work will update GPU mapping.
+ * If GPU vm fault retry is enabled, unmap the svm range from GPU, vm fault
+ * will update GPU mapping.
  */
 static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
@@ -1366,9 +1498,10 @@  svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
 		svm_range_unmap_from_cpu(mni->mm, prange, start, last);
 		break;
 	default:
+		mmu_interval_set_seq(mni, cur_seq);
+		svm_range_evict(prange, mni->mm, start, last);
 		break;
 	}
-
 	return true;
 }
 
@@ -1389,6 +1522,8 @@  int svm_range_list_init(struct kfd_process *p)
 	svms->objects = RB_ROOT_CACHED;
 	mutex_init(&svms->lock);
 	INIT_LIST_HEAD(&svms->list);
+	atomic_set(&svms->evicted_ranges, 0);
+	INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
 	INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
 	INIT_LIST_HEAD(&svms->deferred_range_list);
 	spin_lock_init(&svms->deferred_list_lock);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 1d89401f16b4..267545c5ceb9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -66,6 +66,7 @@  struct svm_work_list_item {
  * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
  * @actual_loc: the actual location, 0 for CPU, or GPU id
  * @granularity:migration granularity, log2 num pages
+ * @invalid:    not 0 means cpu page table is invalidated
  * @notifier:   register mmu interval notifier
  * @work_item:  deferred work item information
  * @deferred_list: list header used to add range to deferred list
@@ -96,6 +97,7 @@  struct svm_range {
 	uint32_t			prefetch_loc;
 	uint32_t			actual_loc;
 	uint8_t				granularity;
+	atomic_t			invalid;
 	struct mmu_interval_notifier	notifier;
 	struct svm_work_list_item	work_item;
 	struct list_head		deferred_list;