diff mbox series

[v3,2/2] drm/i915/dgfx: Release mmap on rpm suspend

Message ID 20220909112419.26901-3-anshuman.gupta@intel.com (mailing list archive)
State New, archived
Headers show
Series DGFX mmap with rpm | expand

Commit Message

Gupta, Anshuman Sept. 9, 2022, 11:24 a.m. UTC
Release all mmap mapping for all lmem objects which are associated
with userfault such that, while pcie function in D3hot, any access
to memory mappings will raise a userfault.

Runtime resume the dgpu(when gem object lies in lmem).
This will transition the dgpu graphics function to D0
state if it was in D3 in order to access the mmap memory
mappings.

v2:
- Squashes the patches. [Matt Auld]
- Add adequate locking for lmem_userfault_list addition. [Matt Auld]
- Reused obj->userfault_count to avoid double addition. [Matt Auld]
- Added i915_gem_object_lock to check
  i915_gem_object_is_lmem. [Matt Auld]

v3:
- Use i915_ttm_cpu_maps_iomem. [Matt Auld]
- Fix 'ret == 0 to ret == VM_FAULT_NOPAGE'. [Matt Auld]
- Reuse obj->userfault_count as a bool 0 or 1. [Matt Auld]
- Delete the mmaped obj from lmem_userfault_list in obj
  destruction path. [Matt Auld]
- Get a wakeref for object destruction patch. [Matt Auld]
- Use intel_wakeref_auto to delay runtime PM. [Matt Auld]

PCIe Specs 5.3.1.4.1

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6331
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c      | 18 ++++++--
 drivers/gpu/drm/i915/gem/i915_gem_mman.h      |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |  2 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 46 ++++++++++++++++---
 drivers/gpu/drm/i915/gt/intel_gt.c            |  2 +
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |  3 ++
 drivers/gpu/drm/i915/i915_driver.c            |  1 -
 drivers/gpu/drm/i915/i915_gem.c               |  5 ++
 9 files changed, 68 insertions(+), 13 deletions(-)

Comments

Matthew Auld Sept. 9, 2022, 3:36 p.m. UTC | #1
On 09/09/2022 12:24, Anshuman Gupta wrote:
> Release all mmap mapping for all lmem objects which are associated
> with userfault such that, while pcie function in D3hot, any access
> to memory mappings will raise a userfault.
> 
> Runtime resume the dgpu(when gem object lies in lmem).
> This will transition the dgpu graphics function to D0
> state if it was in D3 in order to access the mmap memory
> mappings.
> 
> v2:
> - Squashes the patches. [Matt Auld]
> - Add adequate locking for lmem_userfault_list addition. [Matt Auld]
> - Reused obj->userfault_count to avoid double addition. [Matt Auld]
> - Added i915_gem_object_lock to check
>    i915_gem_object_is_lmem. [Matt Auld]
> 
> v3:
> - Use i915_ttm_cpu_maps_iomem. [Matt Auld]
> - Fix 'ret == 0 to ret == VM_FAULT_NOPAGE'. [Matt Auld]
> - Reuse obj->userfault_count as a bool 0 or 1. [Matt Auld]
> - Delete the mmaped obj from lmem_userfault_list in obj
>    destruction path. [Matt Auld]
> - Get a wakeref for object destruction patch. [Matt Auld]
> - Use intel_wakeref_auto to delay runtime PM. [Matt Auld]
> 
> PCIe Specs 5.3.1.4.1
> 
> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6331
> Cc: Matthew Auld <matthew.auld@intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_mman.c      | 18 ++++++--
>   drivers/gpu/drm/i915/gem/i915_gem_mman.h      |  1 +
>   drivers/gpu/drm/i915/gem/i915_gem_object.c    |  2 +-
>   .../gpu/drm/i915/gem/i915_gem_object_types.h  |  3 +-
>   drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 46 ++++++++++++++++---
>   drivers/gpu/drm/i915/gt/intel_gt.c            |  2 +
>   drivers/gpu/drm/i915/gt/intel_gt_types.h      |  3 ++
>   drivers/gpu/drm/i915/i915_driver.c            |  1 -
>   drivers/gpu/drm/i915/i915_gem.c               |  5 ++
>   9 files changed, 68 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> index 2be222c03c82..55a4e9fba5ba 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> @@ -550,13 +550,10 @@ void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
>   	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
>   }
>   
> -void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
> +void __i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
>   {
>   	struct i915_mmap_offset *mmo, *mn;
>   
> -	if (obj->ops->unmap_virtual)
> -		obj->ops->unmap_virtual(obj);
> -
>   	spin_lock(&obj->mmo.lock);
>   	rbtree_postorder_for_each_entry_safe(mmo, mn,
>   					     &obj->mmo.offsets, offset) {
> @@ -573,6 +570,19 @@ void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
>   		spin_lock(&obj->mmo.lock);
>   	}
>   	spin_unlock(&obj->mmo.lock);
> +
> +	if (obj->userfault_count) {
> +		list_del(&obj->userfault_link);
> +		obj->userfault_count = 0;
> +	}
> +}
> +
> +void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
> +{
> +	if (obj->ops->unmap_virtual)
> +		obj->ops->unmap_virtual(obj);
> +
> +	__i915_gem_object_release_mmap_offset(obj);
>   }
>   
>   static struct i915_mmap_offset *
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
> index efee9e0d2508..271039fdf875 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
> @@ -27,6 +27,7 @@ int i915_gem_dumb_mmap_offset(struct drm_file *file_priv,
>   void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj);
>   void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj);
>   
> +void __i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj);
>   void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj);
>   
>   #endif
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index 389e9f157ca5..f6e60cc86b9e 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -238,7 +238,7 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj)
>   {
>   	/* Skip serialisation and waking the device if known to be not used. */
>   
> -	if (obj->userfault_count)
> +	if (obj->userfault_count && !IS_DGFX(to_i915(obj->base.dev)))
>   		i915_gem_object_release_mmap_gtt(obj);
>   
>   	if (!RB_EMPTY_ROOT(&obj->mmo.offsets)) {
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index 9f6b14ec189a..40305e2bcd49 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -298,7 +298,8 @@ struct drm_i915_gem_object {
>   	};
>   
>   	/**
> -	 * Whether the object is currently in the GGTT mmap.
> +	 * Whether the object is currently in the GGTT or any other supported
> +	 * fake offset mmap backed by lmem.
>   	 */
>   	unsigned int userfault_count;
>   	struct list_head userfault_link;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index bc9c432edffe..bfb2074d65ae 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -509,9 +509,17 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
>   static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
>   {
>   	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> +	intel_wakeref_t wakeref = 0;
>   
>   	if (likely(obj)) {
> +		if (i915_ttm_cpu_maps_iomem(bo->resource))
> +			wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
> +
>   		__i915_gem_object_pages_fini(obj);
> +
> +		if (wakeref)
> +			intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
> +
>   		i915_ttm_free_cached_io_rsgt(obj);
>   	}
>   }
> @@ -981,6 +989,7 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
>   	struct ttm_buffer_object *bo = area->vm_private_data;
>   	struct drm_device *dev = bo->base.dev;
>   	struct drm_i915_gem_object *obj;
> +	intel_wakeref_t wakeref = 0;
>   	vm_fault_t ret;
>   	int idx;
>   
> @@ -990,16 +999,22 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
>   
>   	/* Sanity check that we allow writing into this object */
>   	if (unlikely(i915_gem_object_is_readonly(obj) &&
> -		     area->vm_flags & VM_WRITE))
> -		return VM_FAULT_SIGBUS;
> +		     area->vm_flags & VM_WRITE)) {
> +		ret = VM_FAULT_SIGBUS;
> +		goto out_rpm;
> +	}
>   
>   	ret = ttm_bo_vm_reserve(bo, vmf);
>   	if (ret)
> -		return ret;
> +		goto out_rpm;
> +
> +	if (i915_ttm_cpu_maps_iomem(bo->resource))
> +		wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);

Could maybe move this down a bit. No need to grab the wakeref if the 
object is marked as DONTNEED.

>   
>   	if (obj->mm.madv != I915_MADV_WILLNEED) {
>   		dma_resv_unlock(bo->base.resv);
> -		return VM_FAULT_SIGBUS;
> +		ret = VM_FAULT_SIGBUS;
> +		goto out_rpm;
>   	}
>   
>   	if (!i915_ttm_resource_mappable(bo->resource)) {
> @@ -1023,7 +1038,8 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
>   		if (err) {
>   			drm_dbg(dev, "Unable to make resource CPU accessible\n");
>   			dma_resv_unlock(bo->base.resv);
> -			return VM_FAULT_SIGBUS;
> +			ret = VM_FAULT_SIGBUS;
> +			goto out_rpm;
>   		}
>   	}
>   
> @@ -1034,12 +1050,30 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
>   	} else {
>   		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
>   	}
> +
>   	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
> -		return ret;
> +		goto out_rpm;
> +
> +	/* ttm_bo_vm_reserve() already has dma_resv_lock */
> +	if (ret == VM_FAULT_NOPAGE && wakeref && !obj->userfault_count) {
> +		obj->userfault_count = 1;
> +		mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
> +		list_add(&obj->userfault_link, &to_gt(to_i915(obj->base.dev))->lmem_userfault_list);
> +		mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
> +	}
> +
> +	if (wakeref && CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
> +		intel_wakeref_auto(&to_gt(to_i915(obj->base.dev))->userfault_wakeref,
> +				   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
>   
>   	i915_ttm_adjust_lru(obj);
>   
>   	dma_resv_unlock(bo->base.resv);
> +
> +out_rpm:
> +	if (wakeref)
> +		intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
> +
>   	return ret;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 1ce344cfa827..ee9ee815f505 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -39,6 +39,8 @@ static void __intel_gt_init_early(struct intel_gt *gt)
>   {
>   	spin_lock_init(&gt->irq_lock);
>   
> +	INIT_LIST_HEAD(&gt->lmem_userfault_list);
> +	mutex_init(&gt->lmem_userfault_lock);
>   	INIT_LIST_HEAD(&gt->closed_vma);
>   	spin_lock_init(&gt->closed_lock);
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index e6a662f9d7c0..a2d87e742161 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -132,6 +132,9 @@ struct intel_gt {
>   	struct intel_wakeref wakeref;
>   	atomic_t user_wakeref;
>   
> +	struct mutex lmem_userfault_lock; /* Protects access to usefault list */
> +	struct list_head lmem_userfault_list;

Probably needs a big comment explaining how this all works, since it 
seems quite tricky.

"If we are outside of the runtime suspend path, access to 
@lmem_userfault_list requires always first grabbing the runtime pm, to 
ensure we can't race against runtime suspend removing items. Once we 
have that we also need to grab @lmem_userfault_lock, at which point we 
have exclusive access. The runtime suspend path is special since it 
doens't really hold any locks, but instead has exlusive access by virtue 
of all other accesses requiring holding the runtime pm."

Also according to that we are then missing holding lmem_userfault_lock 
in i915_gem_object_release_mmap_offset(), for the object destruction 
case. We are only holding the runtime pm, which only saves us from 
runtime suspend, and not some other concurrent user, like a different 
object destroy or fault touching the list.

> +
>   	struct list_head closed_vma;
>   	spinlock_t closed_lock; /* guards the list of closed_vma */
>   
> diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
> index 1332c70370a6..81699aa505e2 100644
> --- a/drivers/gpu/drm/i915/i915_driver.c
> +++ b/drivers/gpu/drm/i915/i915_driver.c
> @@ -1591,7 +1591,6 @@ static int intel_runtime_suspend(struct device *kdev)
>   		return -ENODEV;
>   
>   	drm_dbg(&dev_priv->drm, "Suspending device\n");
> -
>   	disable_rpm_wakeref_asserts(rpm);
>   
>   	/*
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 70f082f7911a..01f23c0e7fec 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -842,6 +842,11 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
>   				 &to_gt(i915)->ggtt->userfault_list, userfault_link)
>   		__i915_gem_object_release_mmap_gtt(obj);
>   
> +	list_for_each_entry_safe(obj, on,
> +				 &to_gt(i915)->lmem_userfault_list, userfault_link) {
> +		__i915_gem_object_release_mmap_offset(obj);

drm_vma_node_unmap(&bo->base.vma_node, bdev->dev_mapping);

I don't think mm.offsets is tracking the ttm node...

> +	}
> +
>   	/*
>   	 * The fence will be lost when the device powers down. If any were
>   	 * in use by hardware (i.e. they are pinned), we should not be powering
kernel test robot Sept. 9, 2022, 4:08 p.m. UTC | #2
Hi Anshuman,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]

url:    https://github.com/intel-lab-lkp/linux/commits/Anshuman-Gupta/DGFX-mmap-with-rpm/20220909-192609
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: i386-randconfig-a004 (https://download.01.org/0day-ci/archive/20220910/202209100005.i5hNgIqU-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/b3f193a1659a69de9e9025c9b02a039d0a58390d
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Anshuman-Gupta/DGFX-mmap-with-rpm/20220909-192609
        git checkout b3f193a1659a69de9e9025c9b02a039d0a58390d
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/gpu/drm/i915/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/i915/gem/i915_gem_ttm.c:1065:14: warning: use of logical '&&' with constant operand [-Wconstant-logical-operand]
           if (wakeref && CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
                       ^  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/i915/gem/i915_gem_ttm.c:1065:14: note: use '&' for a bitwise operation
           if (wakeref && CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
                       ^~
                       &
   drivers/gpu/drm/i915/gem/i915_gem_ttm.c:1065:14: note: remove constant to silence this warning
           if (wakeref && CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
                      ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1 warning generated.


vim +1065 drivers/gpu/drm/i915/gem/i915_gem_ttm.c

   985	
   986	static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
   987	{
   988		struct vm_area_struct *area = vmf->vma;
   989		struct ttm_buffer_object *bo = area->vm_private_data;
   990		struct drm_device *dev = bo->base.dev;
   991		struct drm_i915_gem_object *obj;
   992		intel_wakeref_t wakeref = 0;
   993		vm_fault_t ret;
   994		int idx;
   995	
   996		obj = i915_ttm_to_gem(bo);
   997		if (!obj)
   998			return VM_FAULT_SIGBUS;
   999	
  1000		/* Sanity check that we allow writing into this object */
  1001		if (unlikely(i915_gem_object_is_readonly(obj) &&
  1002			     area->vm_flags & VM_WRITE)) {
  1003			ret = VM_FAULT_SIGBUS;
  1004			goto out_rpm;
  1005		}
  1006	
  1007		ret = ttm_bo_vm_reserve(bo, vmf);
  1008		if (ret)
  1009			goto out_rpm;
  1010	
  1011		if (i915_ttm_cpu_maps_iomem(bo->resource))
  1012			wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
  1013	
  1014		if (obj->mm.madv != I915_MADV_WILLNEED) {
  1015			dma_resv_unlock(bo->base.resv);
  1016			ret = VM_FAULT_SIGBUS;
  1017			goto out_rpm;
  1018		}
  1019	
  1020		if (!i915_ttm_resource_mappable(bo->resource)) {
  1021			int err = -ENODEV;
  1022			int i;
  1023	
  1024			for (i = 0; i < obj->mm.n_placements; i++) {
  1025				struct intel_memory_region *mr = obj->mm.placements[i];
  1026				unsigned int flags;
  1027	
  1028				if (!mr->io_size && mr->type != INTEL_MEMORY_SYSTEM)
  1029					continue;
  1030	
  1031				flags = obj->flags;
  1032				flags &= ~I915_BO_ALLOC_GPU_ONLY;
  1033				err = __i915_ttm_migrate(obj, mr, flags);
  1034				if (!err)
  1035					break;
  1036			}
  1037	
  1038			if (err) {
  1039				drm_dbg(dev, "Unable to make resource CPU accessible\n");
  1040				dma_resv_unlock(bo->base.resv);
  1041				ret = VM_FAULT_SIGBUS;
  1042				goto out_rpm;
  1043			}
  1044		}
  1045	
  1046		if (drm_dev_enter(dev, &idx)) {
  1047			ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
  1048						       TTM_BO_VM_NUM_PREFAULT);
  1049			drm_dev_exit(idx);
  1050		} else {
  1051			ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
  1052		}
  1053	
  1054		if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
  1055			goto out_rpm;
  1056	
  1057		/* ttm_bo_vm_reserve() already has dma_resv_lock */
  1058		if (ret == VM_FAULT_NOPAGE && wakeref && !obj->userfault_count) {
  1059			obj->userfault_count = 1;
  1060			mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
  1061			list_add(&obj->userfault_link, &to_gt(to_i915(obj->base.dev))->lmem_userfault_list);
  1062			mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
  1063		}
  1064	
> 1065		if (wakeref && CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
  1066			intel_wakeref_auto(&to_gt(to_i915(obj->base.dev))->userfault_wakeref,
  1067					   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
  1068	
  1069		i915_ttm_adjust_lru(obj);
  1070	
  1071		dma_resv_unlock(bo->base.resv);
  1072	
  1073	out_rpm:
  1074		if (wakeref)
  1075			intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
  1076	
  1077		return ret;
  1078	}
  1079
kernel test robot Sept. 9, 2022, 4:22 p.m. UTC | #3
Hi Anshuman,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]

url:    https://github.com/intel-lab-lkp/linux/commits/Anshuman-Gupta/DGFX-mmap-with-rpm/20220909-192609
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: i386-randconfig-a013 (https://download.01.org/0day-ci/archive/20220910/202209100051.4Wp6eLZf-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/b3f193a1659a69de9e9025c9b02a039d0a58390d
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Anshuman-Gupta/DGFX-mmap-with-rpm/20220909-192609
        git checkout b3f193a1659a69de9e9025c9b02a039d0a58390d
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/gpu/drm/i915/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/i915/gem/i915_gem_ttm.c:1065:14: warning: use of logical '&&' with constant operand [-Wconstant-logical-operand]
           if (wakeref && CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
                       ^  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/i915/gem/i915_gem_ttm.c:1065:14: note: use '&' for a bitwise operation
           if (wakeref && CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
                       ^~
                       &
   drivers/gpu/drm/i915/gem/i915_gem_ttm.c:1065:14: note: remove constant to silence this warning
           if (wakeref && CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
                      ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1 warning generated.


vim +1065 drivers/gpu/drm/i915/gem/i915_gem_ttm.c

   985	
   986	static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
   987	{
   988		struct vm_area_struct *area = vmf->vma;
   989		struct ttm_buffer_object *bo = area->vm_private_data;
   990		struct drm_device *dev = bo->base.dev;
   991		struct drm_i915_gem_object *obj;
   992		intel_wakeref_t wakeref = 0;
   993		vm_fault_t ret;
   994		int idx;
   995	
   996		obj = i915_ttm_to_gem(bo);
   997		if (!obj)
   998			return VM_FAULT_SIGBUS;
   999	
  1000		/* Sanity check that we allow writing into this object */
  1001		if (unlikely(i915_gem_object_is_readonly(obj) &&
  1002			     area->vm_flags & VM_WRITE)) {
  1003			ret = VM_FAULT_SIGBUS;
  1004			goto out_rpm;
  1005		}
  1006	
  1007		ret = ttm_bo_vm_reserve(bo, vmf);
  1008		if (ret)
  1009			goto out_rpm;
  1010	
  1011		if (i915_ttm_cpu_maps_iomem(bo->resource))
  1012			wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
  1013	
  1014		if (obj->mm.madv != I915_MADV_WILLNEED) {
  1015			dma_resv_unlock(bo->base.resv);
  1016			ret = VM_FAULT_SIGBUS;
  1017			goto out_rpm;
  1018		}
  1019	
  1020		if (!i915_ttm_resource_mappable(bo->resource)) {
  1021			int err = -ENODEV;
  1022			int i;
  1023	
  1024			for (i = 0; i < obj->mm.n_placements; i++) {
  1025				struct intel_memory_region *mr = obj->mm.placements[i];
  1026				unsigned int flags;
  1027	
  1028				if (!mr->io_size && mr->type != INTEL_MEMORY_SYSTEM)
  1029					continue;
  1030	
  1031				flags = obj->flags;
  1032				flags &= ~I915_BO_ALLOC_GPU_ONLY;
  1033				err = __i915_ttm_migrate(obj, mr, flags);
  1034				if (!err)
  1035					break;
  1036			}
  1037	
  1038			if (err) {
  1039				drm_dbg(dev, "Unable to make resource CPU accessible\n");
  1040				dma_resv_unlock(bo->base.resv);
  1041				ret = VM_FAULT_SIGBUS;
  1042				goto out_rpm;
  1043			}
  1044		}
  1045	
  1046		if (drm_dev_enter(dev, &idx)) {
  1047			ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
  1048						       TTM_BO_VM_NUM_PREFAULT);
  1049			drm_dev_exit(idx);
  1050		} else {
  1051			ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
  1052		}
  1053	
  1054		if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
  1055			goto out_rpm;
  1056	
  1057		/* ttm_bo_vm_reserve() already has dma_resv_lock */
  1058		if (ret == VM_FAULT_NOPAGE && wakeref && !obj->userfault_count) {
  1059			obj->userfault_count = 1;
  1060			mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
  1061			list_add(&obj->userfault_link, &to_gt(to_i915(obj->base.dev))->lmem_userfault_list);
  1062			mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
  1063		}
  1064	
> 1065		if (wakeref && CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
  1066			intel_wakeref_auto(&to_gt(to_i915(obj->base.dev))->userfault_wakeref,
  1067					   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
  1068	
  1069		i915_ttm_adjust_lru(obj);
  1070	
  1071		dma_resv_unlock(bo->base.resv);
  1072	
  1073	out_rpm:
  1074		if (wakeref)
  1075			intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
  1076	
  1077		return ret;
  1078	}
  1079
Gupta, Anshuman Sept. 12, 2022, 6:05 a.m. UTC | #4
> -----Original Message-----
> From: Auld, Matthew <matthew.auld@intel.com>
> Sent: Friday, September 9, 2022 9:06 PM
> To: Gupta, Anshuman <anshuman.gupta@intel.com>; intel-
> gfx@lists.freedesktop.org
> Cc: joonas.lahtinen@linux.intel.com; Vivi, Rodrigo <rodrigo.vivi@intel.com>;
> Nilawar, Badal <badal.nilawar@intel.com>; chris@chris-wilson.co.uk
> Subject: Re: [PATCH v3 2/2] drm/i915/dgfx: Release mmap on rpm suspend
> 
> On 09/09/2022 12:24, Anshuman Gupta wrote:
> > Release all mmap mapping for all lmem objects which are associated
> > with userfault such that, while pcie function in D3hot, any access to
> > memory mappings will raise a userfault.
> >
> > Runtime resume the dgpu(when gem object lies in lmem).
> > This will transition the dgpu graphics function to D0 state if it was
> > in D3 in order to access the mmap memory mappings.
> >
> > v2:
> > - Squashes the patches. [Matt Auld]
> > - Add adequate locking for lmem_userfault_list addition. [Matt Auld]
> > - Reused obj->userfault_count to avoid double addition. [Matt Auld]
> > - Added i915_gem_object_lock to check
> >    i915_gem_object_is_lmem. [Matt Auld]
> >
> > v3:
> > - Use i915_ttm_cpu_maps_iomem. [Matt Auld]
> > - Fix 'ret == 0 to ret == VM_FAULT_NOPAGE'. [Matt Auld]
> > - Reuse obj->userfault_count as a bool 0 or 1. [Matt Auld]
> > - Delete the mmaped obj from lmem_userfault_list in obj
> >    destruction path. [Matt Auld]
> > - Get a wakeref for object destruction patch. [Matt Auld]
> > - Use intel_wakeref_auto to delay runtime PM. [Matt Auld]
> >
> > PCIe Specs 5.3.1.4.1
> >
> > Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6331
> > Cc: Matthew Auld <matthew.auld@intel.com>
> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_mman.c      | 18 ++++++--
> >   drivers/gpu/drm/i915/gem/i915_gem_mman.h      |  1 +
> >   drivers/gpu/drm/i915/gem/i915_gem_object.c    |  2 +-
> >   .../gpu/drm/i915/gem/i915_gem_object_types.h  |  3 +-
> >   drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 46 ++++++++++++++++---
> >   drivers/gpu/drm/i915/gt/intel_gt.c            |  2 +
> >   drivers/gpu/drm/i915/gt/intel_gt_types.h      |  3 ++
> >   drivers/gpu/drm/i915/i915_driver.c            |  1 -
> >   drivers/gpu/drm/i915/i915_gem.c               |  5 ++
> >   9 files changed, 68 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > index 2be222c03c82..55a4e9fba5ba 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > @@ -550,13 +550,10 @@ void i915_gem_object_release_mmap_gtt(struct
> drm_i915_gem_object *obj)
> >   	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
> >   }
> >
> > -void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object
> > *obj)
> > +void __i915_gem_object_release_mmap_offset(struct drm_i915_gem_object
> > +*obj)
> >   {
> >   	struct i915_mmap_offset *mmo, *mn;
> >
> > -	if (obj->ops->unmap_virtual)
> > -		obj->ops->unmap_virtual(obj);
> > -
> >   	spin_lock(&obj->mmo.lock);
> >   	rbtree_postorder_for_each_entry_safe(mmo, mn,
> >   					     &obj->mmo.offsets, offset) { @@ -
> 573,6 +570,19 @@ void
> > i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
> >   		spin_lock(&obj->mmo.lock);
> >   	}
> >   	spin_unlock(&obj->mmo.lock);
> > +
> > +	if (obj->userfault_count) {
> > +		list_del(&obj->userfault_link);
> > +		obj->userfault_count = 0;
> > +	}
> > +}
> > +
> > +void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object
> > +*obj) {
> > +	if (obj->ops->unmap_virtual)
> > +		obj->ops->unmap_virtual(obj);
> > +
> > +	__i915_gem_object_release_mmap_offset(obj);
> >   }
> >
> >   static struct i915_mmap_offset *
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h
> > b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
> > index efee9e0d2508..271039fdf875 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
> > @@ -27,6 +27,7 @@ int i915_gem_dumb_mmap_offset(struct drm_file
> *file_priv,
> >   void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object
> *obj);
> >   void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object
> > *obj);
> >
> > +void __i915_gem_object_release_mmap_offset(struct drm_i915_gem_object
> > +*obj);
> >   void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object
> > *obj);
> >
> >   #endif
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > index 389e9f157ca5..f6e60cc86b9e 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > @@ -238,7 +238,7 @@ static void __i915_gem_object_free_mmaps(struct
> drm_i915_gem_object *obj)
> >   {
> >   	/* Skip serialisation and waking the device if known to be not
> > used. */
> >
> > -	if (obj->userfault_count)
> > +	if (obj->userfault_count && !IS_DGFX(to_i915(obj->base.dev)))
> >   		i915_gem_object_release_mmap_gtt(obj);
> >
> >   	if (!RB_EMPTY_ROOT(&obj->mmo.offsets)) { diff --git
> > a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > index 9f6b14ec189a..40305e2bcd49 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > @@ -298,7 +298,8 @@ struct drm_i915_gem_object {
> >   	};
> >
> >   	/**
> > -	 * Whether the object is currently in the GGTT mmap.
> > +	 * Whether the object is currently in the GGTT or any other supported
> > +	 * fake offset mmap backed by lmem.
> >   	 */
> >   	unsigned int userfault_count;
> >   	struct list_head userfault_link;
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > index bc9c432edffe..bfb2074d65ae 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > @@ -509,9 +509,17 @@ static int i915_ttm_shrink(struct
> drm_i915_gem_object *obj, unsigned int flags)
> >   static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
> >   {
> >   	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> > +	intel_wakeref_t wakeref = 0;
> >
> >   	if (likely(obj)) {
> > +		if (i915_ttm_cpu_maps_iomem(bo->resource))
> > +			wakeref =
> > +intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
> > +
> >   		__i915_gem_object_pages_fini(obj);
> > +
> > +		if (wakeref)
> > +			intel_runtime_pm_put(&to_i915(obj->base.dev)-
> >runtime_pm,
> > +wakeref);
> > +
> >   		i915_ttm_free_cached_io_rsgt(obj);
> >   	}
> >   }
> > @@ -981,6 +989,7 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
> >   	struct ttm_buffer_object *bo = area->vm_private_data;
> >   	struct drm_device *dev = bo->base.dev;
> >   	struct drm_i915_gem_object *obj;
> > +	intel_wakeref_t wakeref = 0;
> >   	vm_fault_t ret;
> >   	int idx;
> >
> > @@ -990,16 +999,22 @@ static vm_fault_t vm_fault_ttm(struct vm_fault
> > *vmf)
> >
> >   	/* Sanity check that we allow writing into this object */
> >   	if (unlikely(i915_gem_object_is_readonly(obj) &&
> > -		     area->vm_flags & VM_WRITE))
> > -		return VM_FAULT_SIGBUS;
> > +		     area->vm_flags & VM_WRITE)) {
> > +		ret = VM_FAULT_SIGBUS;
> > +		goto out_rpm;
> > +	}
> >
> >   	ret = ttm_bo_vm_reserve(bo, vmf);
> >   	if (ret)
> > -		return ret;
> > +		goto out_rpm;
> > +
> > +	if (i915_ttm_cpu_maps_iomem(bo->resource))
> > +		wakeref =
> > +intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
> 
> Could maybe move this down a bit. No need to grab the wakeref if the object is
> marked as DONTNEED.
> 
> >
> >   	if (obj->mm.madv != I915_MADV_WILLNEED) {
> >   		dma_resv_unlock(bo->base.resv);
> > -		return VM_FAULT_SIGBUS;
> > +		ret = VM_FAULT_SIGBUS;
> > +		goto out_rpm;
> >   	}
> >
> >   	if (!i915_ttm_resource_mappable(bo->resource)) { @@ -1023,7 +1038,8
> > @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
> >   		if (err) {
> >   			drm_dbg(dev, "Unable to make resource CPU
> accessible\n");
> >   			dma_resv_unlock(bo->base.resv);
> > -			return VM_FAULT_SIGBUS;
> > +			ret = VM_FAULT_SIGBUS;
> > +			goto out_rpm;
> >   		}
> >   	}
> >
> > @@ -1034,12 +1050,30 @@ static vm_fault_t vm_fault_ttm(struct vm_fault
> *vmf)
> >   	} else {
> >   		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma-
> >vm_page_prot);
> >   	}
> > +
> >   	if (ret == VM_FAULT_RETRY && !(vmf->flags &
> FAULT_FLAG_RETRY_NOWAIT))
> > -		return ret;
> > +		goto out_rpm;
> > +
> > +	/* ttm_bo_vm_reserve() already has dma_resv_lock */
> > +	if (ret == VM_FAULT_NOPAGE && wakeref && !obj->userfault_count) {
> > +		obj->userfault_count = 1;
> > +		mutex_lock(&to_gt(to_i915(obj->base.dev))-
> >lmem_userfault_lock);
> > +		list_add(&obj->userfault_link, &to_gt(to_i915(obj->base.dev))-
> >lmem_userfault_list);
> > +		mutex_unlock(&to_gt(to_i915(obj->base.dev))-
> >lmem_userfault_lock);
> > +	}
> > +
> > +	if (wakeref && CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
> > +		intel_wakeref_auto(&to_gt(to_i915(obj->base.dev))-
> >userfault_wakeref,
> > +
> > +msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
> >
> >   	i915_ttm_adjust_lru(obj);
> >
> >   	dma_resv_unlock(bo->base.resv);
> > +
> > +out_rpm:
> > +	if (wakeref)
> > +		intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm,
> wakeref);
> > +
> >   	return ret;
> >   }
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c
> > b/drivers/gpu/drm/i915/gt/intel_gt.c
> > index 1ce344cfa827..ee9ee815f505 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> > @@ -39,6 +39,8 @@ static void __intel_gt_init_early(struct intel_gt *gt)
> >   {
> >   	spin_lock_init(&gt->irq_lock);
> >
> > +	INIT_LIST_HEAD(&gt->lmem_userfault_list);
> > +	mutex_init(&gt->lmem_userfault_lock);
> >   	INIT_LIST_HEAD(&gt->closed_vma);
> >   	spin_lock_init(&gt->closed_lock);
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > index e6a662f9d7c0..a2d87e742161 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > @@ -132,6 +132,9 @@ struct intel_gt {
> >   	struct intel_wakeref wakeref;
> >   	atomic_t user_wakeref;
> >
> > +	struct mutex lmem_userfault_lock; /* Protects access to usefault list */
> > +	struct list_head lmem_userfault_list;
> 
> Probably needs a big comment explaining how this all works, since it seems quite
> tricky.
> 
> "If we are outside of the runtime suspend path, access to @lmem_userfault_list
> requires always first grabbing the runtime pm, to ensure we can't race against
> runtime suspend removing items. Once we have that we also need to grab
> @lmem_userfault_lock, at which point we have exclusive access. The runtime
> suspend path is special since it doens't really hold any locks, but instead has
> exlusive access by virtue of all other accesses requiring holding the runtime pm."
> 
> Also according to that we are then missing holding lmem_userfault_lock in
> i915_gem_object_release_mmap_offset(), for the object destruction case. We
> are only holding the runtime pm, which only saves us from runtime suspend, and
> not some other concurrent user, like a different object destroy or fault touching
> the list.
Sure I will update this comment.
> 
> > +
> >   	struct list_head closed_vma;
> >   	spinlock_t closed_lock; /* guards the list of closed_vma */
> >
> > diff --git a/drivers/gpu/drm/i915/i915_driver.c
> b/drivers/gpu/drm/i915/i915_driver.c
> > index 1332c70370a6..81699aa505e2 100644
> > --- a/drivers/gpu/drm/i915/i915_driver.c
> > +++ b/drivers/gpu/drm/i915/i915_driver.c
> > @@ -1591,7 +1591,6 @@ static int intel_runtime_suspend(struct device
> *kdev)
> >   		return -ENODEV;
> >
> >   	drm_dbg(&dev_priv->drm, "Suspending device\n");
> > -
> >   	disable_rpm_wakeref_asserts(rpm);
> >
> >   	/*
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c
> b/drivers/gpu/drm/i915/i915_gem.c
> > index 70f082f7911a..01f23c0e7fec 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -842,6 +842,11 @@ void i915_gem_runtime_suspend(struct
> drm_i915_private *i915)
> >   				 &to_gt(i915)->ggtt->userfault_list,
> userfault_link)
> >   		__i915_gem_object_release_mmap_gtt(obj);
> >
> > +	list_for_each_entry_safe(obj, on,
> > +				 &to_gt(i915)->lmem_userfault_list,
> userfault_link) {
> > +		__i915_gem_object_release_mmap_offset(obj);
> 
> drm_vma_node_unmap(&bo->base.vma_node, bdev->dev_mapping);
> 
> I don't think mm.offsets is tracking the ttm node...
Thanks for pointing it, I am surprised in this case how existing  implementation of
i915_gem_object_release_mmap_offset (object destruction path)
is releasing the mmap mapping with mmo.offset, do we need to change there as well ?

Thanks,
Anshuman Gupta.
> 
> > +	}
> > +
> >   	/*
> >   	 * The fence will be lost when the device powers down. If any were
> >   	 * in use by hardware (i.e. they are pinned), we should not be powering
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 2be222c03c82..55a4e9fba5ba 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -550,13 +550,10 @@  void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 }
 
-void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
+void __i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
 {
 	struct i915_mmap_offset *mmo, *mn;
 
-	if (obj->ops->unmap_virtual)
-		obj->ops->unmap_virtual(obj);
-
 	spin_lock(&obj->mmo.lock);
 	rbtree_postorder_for_each_entry_safe(mmo, mn,
 					     &obj->mmo.offsets, offset) {
@@ -573,6 +570,19 @@  void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
 		spin_lock(&obj->mmo.lock);
 	}
 	spin_unlock(&obj->mmo.lock);
+
+	if (obj->userfault_count) {
+		list_del(&obj->userfault_link);
+		obj->userfault_count = 0;
+	}
+}
+
+void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
+{
+	if (obj->ops->unmap_virtual)
+		obj->ops->unmap_virtual(obj);
+
+	__i915_gem_object_release_mmap_offset(obj);
 }
 
 static struct i915_mmap_offset *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
index efee9e0d2508..271039fdf875 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
@@ -27,6 +27,7 @@  int i915_gem_dumb_mmap_offset(struct drm_file *file_priv,
 void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj);
 void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj);
 
+void __i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj);
 void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj);
 
 #endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 389e9f157ca5..f6e60cc86b9e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -238,7 +238,7 @@  static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj)
 {
 	/* Skip serialisation and waking the device if known to be not used. */
 
-	if (obj->userfault_count)
+	if (obj->userfault_count && !IS_DGFX(to_i915(obj->base.dev)))
 		i915_gem_object_release_mmap_gtt(obj);
 
 	if (!RB_EMPTY_ROOT(&obj->mmo.offsets)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 9f6b14ec189a..40305e2bcd49 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -298,7 +298,8 @@  struct drm_i915_gem_object {
 	};
 
 	/**
-	 * Whether the object is currently in the GGTT mmap.
+	 * Whether the object is currently in the GGTT or any other supported
+	 * fake offset mmap backed by lmem.
 	 */
 	unsigned int userfault_count;
 	struct list_head userfault_link;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index bc9c432edffe..bfb2074d65ae 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -509,9 +509,17 @@  static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
 static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
 {
 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	intel_wakeref_t wakeref = 0;
 
 	if (likely(obj)) {
+		if (i915_ttm_cpu_maps_iomem(bo->resource))
+			wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
+
 		__i915_gem_object_pages_fini(obj);
+
+		if (wakeref)
+			intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
+
 		i915_ttm_free_cached_io_rsgt(obj);
 	}
 }
@@ -981,6 +989,7 @@  static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
 	struct ttm_buffer_object *bo = area->vm_private_data;
 	struct drm_device *dev = bo->base.dev;
 	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref = 0;
 	vm_fault_t ret;
 	int idx;
 
@@ -990,16 +999,22 @@  static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
 
 	/* Sanity check that we allow writing into this object */
 	if (unlikely(i915_gem_object_is_readonly(obj) &&
-		     area->vm_flags & VM_WRITE))
-		return VM_FAULT_SIGBUS;
+		     area->vm_flags & VM_WRITE)) {
+		ret = VM_FAULT_SIGBUS;
+		goto out_rpm;
+	}
 
 	ret = ttm_bo_vm_reserve(bo, vmf);
 	if (ret)
-		return ret;
+		goto out_rpm;
+
+	if (i915_ttm_cpu_maps_iomem(bo->resource))
+		wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
 
 	if (obj->mm.madv != I915_MADV_WILLNEED) {
 		dma_resv_unlock(bo->base.resv);
-		return VM_FAULT_SIGBUS;
+		ret = VM_FAULT_SIGBUS;
+		goto out_rpm;
 	}
 
 	if (!i915_ttm_resource_mappable(bo->resource)) {
@@ -1023,7 +1038,8 @@  static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
 		if (err) {
 			drm_dbg(dev, "Unable to make resource CPU accessible\n");
 			dma_resv_unlock(bo->base.resv);
-			return VM_FAULT_SIGBUS;
+			ret = VM_FAULT_SIGBUS;
+			goto out_rpm;
 		}
 	}
 
@@ -1034,12 +1050,30 @@  static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
 	} else {
 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
 	}
+
 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
-		return ret;
+		goto out_rpm;
+
+	/* ttm_bo_vm_reserve() already has dma_resv_lock */
+	if (ret == VM_FAULT_NOPAGE && wakeref && !obj->userfault_count) {
+		obj->userfault_count = 1;
+		mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
+		list_add(&obj->userfault_link, &to_gt(to_i915(obj->base.dev))->lmem_userfault_list);
+		mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
+	}
+
+	if (wakeref && CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
+		intel_wakeref_auto(&to_gt(to_i915(obj->base.dev))->userfault_wakeref,
+				   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
 
 	i915_ttm_adjust_lru(obj);
 
 	dma_resv_unlock(bo->base.resv);
+
+out_rpm:
+	if (wakeref)
+		intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 1ce344cfa827..ee9ee815f505 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -39,6 +39,8 @@  static void __intel_gt_init_early(struct intel_gt *gt)
 {
 	spin_lock_init(&gt->irq_lock);
 
+	INIT_LIST_HEAD(&gt->lmem_userfault_list);
+	mutex_init(&gt->lmem_userfault_lock);
 	INIT_LIST_HEAD(&gt->closed_vma);
 	spin_lock_init(&gt->closed_lock);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index e6a662f9d7c0..a2d87e742161 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -132,6 +132,9 @@  struct intel_gt {
 	struct intel_wakeref wakeref;
 	atomic_t user_wakeref;
 
+	struct mutex lmem_userfault_lock; /* Protects access to usefault list */
+	struct list_head lmem_userfault_list;
+
 	struct list_head closed_vma;
 	spinlock_t closed_lock; /* guards the list of closed_vma */
 
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 1332c70370a6..81699aa505e2 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -1591,7 +1591,6 @@  static int intel_runtime_suspend(struct device *kdev)
 		return -ENODEV;
 
 	drm_dbg(&dev_priv->drm, "Suspending device\n");
-
 	disable_rpm_wakeref_asserts(rpm);
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 70f082f7911a..01f23c0e7fec 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -842,6 +842,11 @@  void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 				 &to_gt(i915)->ggtt->userfault_list, userfault_link)
 		__i915_gem_object_release_mmap_gtt(obj);
 
+	list_for_each_entry_safe(obj, on,
+				 &to_gt(i915)->lmem_userfault_list, userfault_link) {
+		__i915_gem_object_release_mmap_offset(obj);
+	}
+
 	/*
 	 * The fence will be lost when the device powers down. If any were
 	 * in use by hardware (i.e. they are pinned), we should not be powering