diff mbox series

[RFC,1/1] drm/i915/dgfx: Handling of pin_map against rpm

Message ID 20220915103311.5634-2-anshuman.gupta@intel.com (mailing list archive)
State New, archived
Headers show
Series DGFX pin_map with rpm | expand

Commit Message

Gupta, Anshuman Sept. 15, 2022, 10:33 a.m. UTC
If i915 gem obj lies in lmem, then i915_gem_object_pin_map
need to grab a rpm wakeref to make sure gfx PCIe endpoint
function stays in D0 state during any access to mapping
returned by i915_gem_object_pin_map().
Subsequently i915_gem_object_upin_map will put the wakref as well.

Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c       |  2 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.h       |  5 +++++
 drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 14 ++++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_pages.c        |  8 ++++++++
 4 files changed, 29 insertions(+)

Comments

Tvrtko Ursulin Sept. 15, 2022, 2:17 p.m. UTC | #1
On 15/09/2022 11:33, Anshuman Gupta wrote:
> If i915 gem obj lies in lmem, then i915_gem_object_pin_map
> need to grab a rpm wakeref to make sure gfx PCIe endpoint
> function stays in D0 state during any access to mapping
> returned by i915_gem_object_pin_map().
> Subsequently i915_gem_object_upin_map will put the wakref as well.
> 
> Cc: Matthew Auld <matthew.auld@intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Andi Shyti <andi.shyti@linux.intel.com>
> Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_object.c       |  2 ++
>   drivers/gpu/drm/i915/gem/i915_gem_object.h       |  5 +++++
>   drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 14 ++++++++++++++
>   drivers/gpu/drm/i915/gem/i915_gem_pages.c        |  8 ++++++++
>   4 files changed, 29 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index 85482a04d158..f291f990838d 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -95,6 +95,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>   	mutex_init(&obj->mm.get_page.lock);
>   	INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN);
>   	mutex_init(&obj->mm.get_dma_page.lock);
> +	mutex_init(&obj->wakeref_lock);
>   }
>   
>   /**
> @@ -110,6 +111,7 @@ void __i915_gem_object_fini(struct drm_i915_gem_object *obj)
>   {
>   	mutex_destroy(&obj->mm.get_page.lock);
>   	mutex_destroy(&obj->mm.get_dma_page.lock);
> +	mutex_destroy(&obj->wakeref_lock);
>   	dma_resv_fini(&obj->base._resv);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index 7317d4102955..b31ac6e4c272 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -501,6 +501,11 @@ static inline void i915_gem_object_flush_map(struct drm_i915_gem_object *obj)
>    */
>   static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
>   {
> +	mutext_lock(obj->wakeref_lock);
> +	if (!--obj->wakeref_count)
> +		intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, obj->wakeref);
> +	mutext_unlock(obj->wakeref_lock);
> +
>   	i915_gem_object_unpin_pages(obj);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index 9f6b14ec189a..34aff95a1984 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -657,6 +657,20 @@ struct drm_i915_gem_object {
>   
>   		void *gvt_info;
>   	};
> +
> +	/**
> +	 * wakeref to protect the i915 lmem iomem mappings.
> +	 * We don't pin_map an object partially that makes easy
> +	 * to track the wakeref cookie, if wakeref is already held
> +	 * then we don't need to grab it again for other pin_map.
> +	 * first pin_map will grab the wakeref and last unpin_map
> +	 * will put the wakeref.
> +	 */
> +	intel_wakeref_t wakeref;
> +	unsigned int wakeref_count;
> +
> +	/** protects the wakeref_count wakeref cookie against multiple pin_map and unpin_map */
> +	struct mutex wakeref_lock;

On one side it feels wasteful to have counters per object. But then I 
also notice pin_map is only allowed under the obj dma_resv locked - 
meaning that lock is already held. So you possibly don't need a new 
mutex, someone more up to date please confirm.

Option B - trading space efficieny for one more atomic - would be to 
track it on the level of i915 and maybe use atomic_t? Would we have to 
worry about overflow more in this case? Hm some protection regardless of 
the option will be needed just in case.

Regards,

Tvrtko

>   };
>   
>   static inline struct drm_i915_gem_object *
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> index 4df50b049cea..b638b5413280 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> @@ -370,6 +370,14 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
>   
>   	assert_object_held(obj);
>   
> +	if (i915_gem_object_is_lmem(obj)) {
> +		mutex_lock(&obj->wakeref_lock);
> +		if (!obj->wakeref_count++)
> +			obj->wakeref =
> +				intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
> +		mutex_unlock(&obj->wakeref_lock);
> +	}
> +
>   	pinned = !(type & I915_MAP_OVERRIDE);
>   	type &= ~I915_MAP_OVERRIDE;
>
Tvrtko Ursulin Sept. 15, 2022, 2:32 p.m. UTC | #2
On 15/09/2022 11:33, Anshuman Gupta wrote:
> If i915 gem obj lies in lmem, then i915_gem_object_pin_map
> need to grab a rpm wakeref to make sure gfx PCIe endpoint
> function stays in D0 state during any access to mapping
> returned by i915_gem_object_pin_map().
> Subsequently i915_gem_object_upin_map will put the wakref as well.
> 
> Cc: Matthew Auld <matthew.auld@intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Andi Shyti <andi.shyti@linux.intel.com>
> Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_object.c       |  2 ++
>   drivers/gpu/drm/i915/gem/i915_gem_object.h       |  5 +++++
>   drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 14 ++++++++++++++
>   drivers/gpu/drm/i915/gem/i915_gem_pages.c        |  8 ++++++++
>   4 files changed, 29 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index 85482a04d158..f291f990838d 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -95,6 +95,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>   	mutex_init(&obj->mm.get_page.lock);
>   	INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN);
>   	mutex_init(&obj->mm.get_dma_page.lock);
> +	mutex_init(&obj->wakeref_lock);
>   }
>   
>   /**
> @@ -110,6 +111,7 @@ void __i915_gem_object_fini(struct drm_i915_gem_object *obj)
>   {
>   	mutex_destroy(&obj->mm.get_page.lock);
>   	mutex_destroy(&obj->mm.get_dma_page.lock);
> +	mutex_destroy(&obj->wakeref_lock);
>   	dma_resv_fini(&obj->base._resv);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index 7317d4102955..b31ac6e4c272 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -501,6 +501,11 @@ static inline void i915_gem_object_flush_map(struct drm_i915_gem_object *obj)
>    */
>   static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
>   {
> +	mutext_lock(obj->wakeref_lock);
> +	if (!--obj->wakeref_count)

Bug here - incremented only for i915_gem_object_is_lmem, decremented for 
all.

a)
Use i915_gem_object_is_lmem here? Is the answer it gives stable across 
object lifetime considering placements and migration?

b)
Store the wakeref in object and release if present?

Regards,

Tvrtko

> +		intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, obj->wakeref);
> +	mutext_unlock(obj->wakeref_lock);
> +
>   	i915_gem_object_unpin_pages(obj);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index 9f6b14ec189a..34aff95a1984 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -657,6 +657,20 @@ struct drm_i915_gem_object {
>   
>   		void *gvt_info;
>   	};
> +
> +	/**
> +	 * wakeref to protect the i915 lmem iomem mappings.
> +	 * We don't pin_map an object partially that makes easy
> +	 * to track the wakeref cookie, if wakeref is already held
> +	 * then we don't need to grab it again for other pin_map.
> +	 * first pin_map will grab the wakeref and last unpin_map
> +	 * will put the wakeref.
> +	 */
> +	intel_wakeref_t wakeref;
> +	unsigned int wakeref_count;
> +
> +	/** protects the wakeref_count wakeref cookie against multiple pin_map and unpin_map */
> +	struct mutex wakeref_lock;
>   };
>   
>   static inline struct drm_i915_gem_object *
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> index 4df50b049cea..b638b5413280 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> @@ -370,6 +370,14 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
>   
>   	assert_object_held(obj);
>   
> +	if (i915_gem_object_is_lmem(obj)) {
> +		mutex_lock(&obj->wakeref_lock);
> +		if (!obj->wakeref_count++)
> +			obj->wakeref =
> +				intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
> +		mutex_unlock(&obj->wakeref_lock);
> +	}
> +
>   	pinned = !(type & I915_MAP_OVERRIDE);
>   	type &= ~I915_MAP_OVERRIDE;
>
Gupta, Anshuman Sept. 15, 2022, 4:41 p.m. UTC | #3
> -----Original Message-----
> From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Sent: Thursday, September 15, 2022 8:03 PM
> To: Gupta, Anshuman <anshuman.gupta@intel.com>; intel-
> gfx@lists.freedesktop.org
> Cc: Auld, Matthew <matthew.auld@intel.com>; Vivi, Rodrigo
> <rodrigo.vivi@intel.com>
> Subject: Re: [Intel-gfx] [RFC 1/1] drm/i915/dgfx: Handling of pin_map against
> rpm
> 
> 
> On 15/09/2022 11:33, Anshuman Gupta wrote:
> > If i915 gem obj lies in lmem, then i915_gem_object_pin_map need to
> > grab a rpm wakeref to make sure gfx PCIe endpoint function stays in D0
> > state during any access to mapping returned by
> > i915_gem_object_pin_map().
> > Subsequently i915_gem_object_upin_map will put the wakref as well.
> >
> > Cc: Matthew Auld <matthew.auld@intel.com>
> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > Cc: Andi Shyti <andi.shyti@linux.intel.com>
> > Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_object.c       |  2 ++
> >   drivers/gpu/drm/i915/gem/i915_gem_object.h       |  5 +++++
> >   drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 14 ++++++++++++++
> >   drivers/gpu/drm/i915/gem/i915_gem_pages.c        |  8 ++++++++
> >   4 files changed, 29 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > index 85482a04d158..f291f990838d 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > @@ -95,6 +95,7 @@ void i915_gem_object_init(struct drm_i915_gem_object
> *obj,
> >   	mutex_init(&obj->mm.get_page.lock);
> >   	INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL |
> __GFP_NOWARN);
> >   	mutex_init(&obj->mm.get_dma_page.lock);
> > +	mutex_init(&obj->wakeref_lock);
> >   }
> >
> >   /**
> > @@ -110,6 +111,7 @@ void __i915_gem_object_fini(struct
> drm_i915_gem_object *obj)
> >   {
> >   	mutex_destroy(&obj->mm.get_page.lock);
> >   	mutex_destroy(&obj->mm.get_dma_page.lock);
> > +	mutex_destroy(&obj->wakeref_lock);
> >   	dma_resv_fini(&obj->base._resv);
> >   }
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > index 7317d4102955..b31ac6e4c272 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > @@ -501,6 +501,11 @@ static inline void i915_gem_object_flush_map(struct
> drm_i915_gem_object *obj)
> >    */
> >   static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object
> *obj)
> >   {
> > +	mutext_lock(obj->wakeref_lock);
> > +	if (!--obj->wakeref_count)
> 
> Bug here - incremented only for i915_gem_object_is_lmem, decremented for
> all.
Thanks for pointing it out, how about like below cond?
if (obj->wakeref_count)
{
	/* wakere_count will non zero only in case of object was in lmem */
	obj->wakeref_count--
	if (!obj->wakeref_count)
		intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm,  obj->wakeref);
}
> 
> a)
> Use i915_gem_object_is_lmem here? Is the answer it gives stable across object
> lifetime considering placements and migration?
AFAIU After pin_map() the caller may drop the object lock, so eviction can take place.
But as we had already taken the wakeref for object, we need to release it.
> 
> b)
> Store the wakeref in object and release if present?
If there are two pin_map for the the same object, then unpin_map() will release the wakeref and the other pin_map() is not protected.
It is my understanding, can it happen ? 
Thanks,
Anshuman Gupta.
> 
> Regards,
> 
> Tvrtko
> 
> > +		intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm,
> obj->wakeref);
> > +	mutext_unlock(obj->wakeref_lock);
> > +
> >   	i915_gem_object_unpin_pages(obj);
> >   }
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > index 9f6b14ec189a..34aff95a1984 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > @@ -657,6 +657,20 @@ struct drm_i915_gem_object {
> >
> >   		void *gvt_info;
> >   	};
> > +
> > +	/**
> > +	 * wakeref to protect the i915 lmem iomem mappings.
> > +	 * We don't pin_map an object partially that makes easy
> > +	 * to track the wakeref cookie, if wakeref is already held
> > +	 * then we don't need to grab it again for other pin_map.
> > +	 * first pin_map will grab the wakeref and last unpin_map
> > +	 * will put the wakeref.
> > +	 */
> > +	intel_wakeref_t wakeref;
> > +	unsigned int wakeref_count;
> > +
> > +	/** protects the wakeref_count wakeref cookie against multiple
> pin_map and unpin_map */
> > +	struct mutex wakeref_lock;
> >   };
> >
> >   static inline struct drm_i915_gem_object * diff --git
> > a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > index 4df50b049cea..b638b5413280 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > @@ -370,6 +370,14 @@ void *i915_gem_object_pin_map(struct
> > drm_i915_gem_object *obj,
> >
> >   	assert_object_held(obj);
> >
> > +	if (i915_gem_object_is_lmem(obj)) {
> > +		mutex_lock(&obj->wakeref_lock);
> > +		if (!obj->wakeref_count++)
> > +			obj->wakeref =
> > +				intel_runtime_pm_get(&to_i915(obj-
> >base.dev)->runtime_pm);
> > +		mutex_unlock(&obj->wakeref_lock);
> > +	}
> > +
> >   	pinned = !(type & I915_MAP_OVERRIDE);
> >   	type &= ~I915_MAP_OVERRIDE;
> >
Gupta, Anshuman Sept. 15, 2022, 4:49 p.m. UTC | #4
> -----Original Message-----
> From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Sent: Thursday, September 15, 2022 7:48 PM
> To: Gupta, Anshuman <anshuman.gupta@intel.com>; intel-
> gfx@lists.freedesktop.org
> Cc: Auld, Matthew <matthew.auld@intel.com>; Vivi, Rodrigo
> <rodrigo.vivi@intel.com>
> Subject: Re: [Intel-gfx] [RFC 1/1] drm/i915/dgfx: Handling of pin_map against
> rpm
> 
> 
> On 15/09/2022 11:33, Anshuman Gupta wrote:
> > If i915 gem obj lies in lmem, then i915_gem_object_pin_map need to
> > grab a rpm wakeref to make sure gfx PCIe endpoint function stays in D0
> > state during any access to mapping returned by
> > i915_gem_object_pin_map().
> > Subsequently i915_gem_object_upin_map will put the wakref as well.
> >
> > Cc: Matthew Auld <matthew.auld@intel.com>
> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > Cc: Andi Shyti <andi.shyti@linux.intel.com>
> > Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_object.c       |  2 ++
> >   drivers/gpu/drm/i915/gem/i915_gem_object.h       |  5 +++++
> >   drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 14 ++++++++++++++
> >   drivers/gpu/drm/i915/gem/i915_gem_pages.c        |  8 ++++++++
> >   4 files changed, 29 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > index 85482a04d158..f291f990838d 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > @@ -95,6 +95,7 @@ void i915_gem_object_init(struct drm_i915_gem_object
> *obj,
> >   	mutex_init(&obj->mm.get_page.lock);
> >   	INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL |
> __GFP_NOWARN);
> >   	mutex_init(&obj->mm.get_dma_page.lock);
> > +	mutex_init(&obj->wakeref_lock);
> >   }
> >
> >   /**
> > @@ -110,6 +111,7 @@ void __i915_gem_object_fini(struct
> drm_i915_gem_object *obj)
> >   {
> >   	mutex_destroy(&obj->mm.get_page.lock);
> >   	mutex_destroy(&obj->mm.get_dma_page.lock);
> > +	mutex_destroy(&obj->wakeref_lock);
> >   	dma_resv_fini(&obj->base._resv);
> >   }
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > index 7317d4102955..b31ac6e4c272 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > @@ -501,6 +501,11 @@ static inline void i915_gem_object_flush_map(struct
> drm_i915_gem_object *obj)
> >    */
> >   static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object
> *obj)
> >   {
> > +	mutext_lock(obj->wakeref_lock);
> > +	if (!--obj->wakeref_count)
> > +		intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm,
> obj->wakeref);
> > +	mutext_unlock(obj->wakeref_lock);
> > +
> >   	i915_gem_object_unpin_pages(obj);
> >   }
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > index 9f6b14ec189a..34aff95a1984 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > @@ -657,6 +657,20 @@ struct drm_i915_gem_object {
> >
> >   		void *gvt_info;
> >   	};
> > +
> > +	/**
> > +	 * wakeref to protect the i915 lmem iomem mappings.
> > +	 * We don't pin_map an object partially that makes easy
> > +	 * to track the wakeref cookie, if wakeref is already held
> > +	 * then we don't need to grab it again for other pin_map.
> > +	 * first pin_map will grab the wakeref and last unpin_map
> > +	 * will put the wakeref.
> > +	 */
> > +	intel_wakeref_t wakeref;
> > +	unsigned int wakeref_count;
> > +
> > +	/** protects the wakeref_count wakeref cookie against multiple
> pin_map and unpin_map */
> > +	struct mutex wakeref_lock;
> 
> On one side it feels wasteful to have counters per object. But then I also notice
> pin_map is only allowed under the obj dma_resv locked - meaning that lock is
> already held. So you possibly don't need a new mutex, someone more up to date
> please confirm.
True , but like i915_gem_object_pin_map_unlocked() will release the gem_object_lock after
pin_map, so  we need some protection in unpin_map().
> 
> Option B - trading space efficieny for one more atomic - would be to track it on
> the level of i915 and maybe use atomic_t? Would we have to worry about
> overflow more in this case? Hm some protection regardless of the option will be
> needed just in case.
I did not understand you comment , is it about using the atomic_t variable for wakeref_count ?
If that is the feedback, i can do that if community is agree on this RFC proposal.
Thanks,
Anshuman Gupta.
> 
> Regards,
> 
> Tvrtko
> 
> >   };
> >
> >   static inline struct drm_i915_gem_object * diff --git
> > a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > index 4df50b049cea..b638b5413280 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > @@ -370,6 +370,14 @@ void *i915_gem_object_pin_map(struct
> > drm_i915_gem_object *obj,
> >
> >   	assert_object_held(obj);
> >
> > +	if (i915_gem_object_is_lmem(obj)) {
> > +		mutex_lock(&obj->wakeref_lock);
> > +		if (!obj->wakeref_count++)
> > +			obj->wakeref =
> > +				intel_runtime_pm_get(&to_i915(obj-
> >base.dev)->runtime_pm);
> > +		mutex_unlock(&obj->wakeref_lock);
> > +	}
> > +
> >   	pinned = !(type & I915_MAP_OVERRIDE);
> >   	type &= ~I915_MAP_OVERRIDE;
> >
Tvrtko Ursulin Sept. 15, 2022, 5:07 p.m. UTC | #5
On 15/09/2022 11:33, Anshuman Gupta wrote:
> If i915 gem obj lies in lmem, then i915_gem_object_pin_map
> need to grab a rpm wakeref to make sure gfx PCIe endpoint
> function stays in D0 state during any access to mapping
> returned by i915_gem_object_pin_map().
> Subsequently i915_gem_object_upin_map will put the wakref as well.

Another thing to check are perma pinned contexts. Follow the flow from 
intel_engine_create_pinned_context to 
intel_engine_destroy_pinned_context. If you find out that kernel (&co) 
contexts are pinned for the duration of i915 load/bind and that they use 
lmem objects, that would mean wakeref is held for the duration of i915 
loaded state. Defeating the point and making the solution effectively 
equal to just disabling RPM.

Regards,

Tvrtko

> Cc: Matthew Auld <matthew.auld@intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Andi Shyti <andi.shyti@linux.intel.com>
> Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_object.c       |  2 ++
>   drivers/gpu/drm/i915/gem/i915_gem_object.h       |  5 +++++
>   drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 14 ++++++++++++++
>   drivers/gpu/drm/i915/gem/i915_gem_pages.c        |  8 ++++++++
>   4 files changed, 29 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index 85482a04d158..f291f990838d 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -95,6 +95,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>   	mutex_init(&obj->mm.get_page.lock);
>   	INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN);
>   	mutex_init(&obj->mm.get_dma_page.lock);
> +	mutex_init(&obj->wakeref_lock);
>   }
>   
>   /**
> @@ -110,6 +111,7 @@ void __i915_gem_object_fini(struct drm_i915_gem_object *obj)
>   {
>   	mutex_destroy(&obj->mm.get_page.lock);
>   	mutex_destroy(&obj->mm.get_dma_page.lock);
> +	mutex_destroy(&obj->wakeref_lock);
>   	dma_resv_fini(&obj->base._resv);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index 7317d4102955..b31ac6e4c272 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -501,6 +501,11 @@ static inline void i915_gem_object_flush_map(struct drm_i915_gem_object *obj)
>    */
>   static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
>   {
> +	mutext_lock(obj->wakeref_lock);
> +	if (!--obj->wakeref_count)
> +		intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, obj->wakeref);
> +	mutext_unlock(obj->wakeref_lock);
> +
>   	i915_gem_object_unpin_pages(obj);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index 9f6b14ec189a..34aff95a1984 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -657,6 +657,20 @@ struct drm_i915_gem_object {
>   
>   		void *gvt_info;
>   	};
> +
> +	/**
> +	 * wakeref to protect the i915 lmem iomem mappings.
> +	 * We don't pin_map an object partially that makes easy
> +	 * to track the wakeref cookie, if wakeref is already held
> +	 * then we don't need to grab it again for other pin_map.
> +	 * first pin_map will grab the wakeref and last unpin_map
> +	 * will put the wakeref.
> +	 */
> +	intel_wakeref_t wakeref;
> +	unsigned int wakeref_count;
> +
> +	/** protects the wakeref_count wakeref cookie against multiple pin_map and unpin_map */
> +	struct mutex wakeref_lock;
>   };
>   
>   static inline struct drm_i915_gem_object *
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> index 4df50b049cea..b638b5413280 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> @@ -370,6 +370,14 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
>   
>   	assert_object_held(obj);
>   
> +	if (i915_gem_object_is_lmem(obj)) {
> +		mutex_lock(&obj->wakeref_lock);
> +		if (!obj->wakeref_count++)
> +			obj->wakeref =
> +				intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
> +		mutex_unlock(&obj->wakeref_lock);
> +	}
> +
>   	pinned = !(type & I915_MAP_OVERRIDE);
>   	type &= ~I915_MAP_OVERRIDE;
>
Gupta, Anshuman Sept. 16, 2022, 10:30 a.m. UTC | #6
> -----Original Message-----
> From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Sent: Thursday, September 15, 2022 10:37 PM
> To: Gupta, Anshuman <anshuman.gupta@intel.com>; intel-
> gfx@lists.freedesktop.org
> Cc: Auld, Matthew <matthew.auld@intel.com>; Vivi, Rodrigo
> <rodrigo.vivi@intel.com>
> Subject: Re: [Intel-gfx] [RFC 1/1] drm/i915/dgfx: Handling of pin_map against
> rpm
> 
> 
> On 15/09/2022 11:33, Anshuman Gupta wrote:
> > If i915 gem obj lies in lmem, then i915_gem_object_pin_map need to
> > grab a rpm wakeref to make sure gfx PCIe endpoint function stays in D0
> > state during any access to mapping returned by
> > i915_gem_object_pin_map().
> > Subsequently i915_gem_object_upin_map will put the wakref as well.
> 
> Another thing to check are perma pinned contexts. Follow the flow from
> intel_engine_create_pinned_context to intel_engine_destroy_pinned_context.
> If you find out that kernel (&co) contexts are pinned for the duration of i915
> load/bind and that they use lmem objects, that would mean wakeref is held for
> the duration of i915 loaded state. Defeating the point and making the solution
> effectively equal to just disabling RPM.
That’s correct  intel_ring_pin can pin_map the lmem object.
      if (i915_vma_is_map_and_fenceable(vma)) {
                addr = (void __force *)i915_vma_pin_iomap(vma);
        } else {
                int type = i915_coherent_map_type(vma->vm->i915, vma->obj, false);

                addr = i915_gem_object_pin_map(vma->obj, type);
        }

If that is the case this RFC proposal will not work and in that case every caller of  i915_gem_object_pin_map need to grab the wakreref before
accessing the retuned address by pin_map. Any inputs from you side for any other approach.

Thanks,
Anshuman Gupta.

> 
> Regards,
> 
> Tvrtko
> 
> > Cc: Matthew Auld <matthew.auld@intel.com>
> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > Cc: Andi Shyti <andi.shyti@linux.intel.com>
> > Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_object.c       |  2 ++
> >   drivers/gpu/drm/i915/gem/i915_gem_object.h       |  5 +++++
> >   drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 14 ++++++++++++++
> >   drivers/gpu/drm/i915/gem/i915_gem_pages.c        |  8 ++++++++
> >   4 files changed, 29 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > index 85482a04d158..f291f990838d 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > @@ -95,6 +95,7 @@ void i915_gem_object_init(struct drm_i915_gem_object
> *obj,
> >   	mutex_init(&obj->mm.get_page.lock);
> >   	INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL |
> __GFP_NOWARN);
> >   	mutex_init(&obj->mm.get_dma_page.lock);
> > +	mutex_init(&obj->wakeref_lock);
> >   }
> >
> >   /**
> > @@ -110,6 +111,7 @@ void __i915_gem_object_fini(struct
> drm_i915_gem_object *obj)
> >   {
> >   	mutex_destroy(&obj->mm.get_page.lock);
> >   	mutex_destroy(&obj->mm.get_dma_page.lock);
> > +	mutex_destroy(&obj->wakeref_lock);
> >   	dma_resv_fini(&obj->base._resv);
> >   }
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > index 7317d4102955..b31ac6e4c272 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > @@ -501,6 +501,11 @@ static inline void i915_gem_object_flush_map(struct
> drm_i915_gem_object *obj)
> >    */
> >   static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object
> *obj)
> >   {
> > +	mutext_lock(obj->wakeref_lock);
> > +	if (!--obj->wakeref_count)
> > +		intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm,
> obj->wakeref);
> > +	mutext_unlock(obj->wakeref_lock);
> > +
> >   	i915_gem_object_unpin_pages(obj);
> >   }
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > index 9f6b14ec189a..34aff95a1984 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > @@ -657,6 +657,20 @@ struct drm_i915_gem_object {
> >
> >   		void *gvt_info;
> >   	};
> > +
> > +	/**
> > +	 * wakeref to protect the i915 lmem iomem mappings.
> > +	 * We don't pin_map an object partially that makes easy
> > +	 * to track the wakeref cookie, if wakeref is already held
> > +	 * then we don't need to grab it again for other pin_map.
> > +	 * first pin_map will grab the wakeref and last unpin_map
> > +	 * will put the wakeref.
> > +	 */
> > +	intel_wakeref_t wakeref;
> > +	unsigned int wakeref_count;
> > +
> > +	/** protects the wakeref_count wakeref cookie against multiple
> pin_map and unpin_map */
> > +	struct mutex wakeref_lock;
> >   };
> >
> >   static inline struct drm_i915_gem_object * diff --git
> > a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > index 4df50b049cea..b638b5413280 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > @@ -370,6 +370,14 @@ void *i915_gem_object_pin_map(struct
> > drm_i915_gem_object *obj,
> >
> >   	assert_object_held(obj);
> >
> > +	if (i915_gem_object_is_lmem(obj)) {
> > +		mutex_lock(&obj->wakeref_lock);
> > +		if (!obj->wakeref_count++)
> > +			obj->wakeref =
> > +				intel_runtime_pm_get(&to_i915(obj-
> >base.dev)->runtime_pm);
> > +		mutex_unlock(&obj->wakeref_lock);
> > +	}
> > +
> >   	pinned = !(type & I915_MAP_OVERRIDE);
> >   	type &= ~I915_MAP_OVERRIDE;
> >
Tvrtko Ursulin Sept. 16, 2022, 10:48 a.m. UTC | #7
On 16/09/2022 11:30, Gupta, Anshuman wrote:
>> -----Original Message-----
>> From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>> Sent: Thursday, September 15, 2022 10:37 PM
>> To: Gupta, Anshuman <anshuman.gupta@intel.com>; intel-
>> gfx@lists.freedesktop.org
>> Cc: Auld, Matthew <matthew.auld@intel.com>; Vivi, Rodrigo
>> <rodrigo.vivi@intel.com>
>> Subject: Re: [Intel-gfx] [RFC 1/1] drm/i915/dgfx: Handling of pin_map against
>> rpm
>>
>>
>> On 15/09/2022 11:33, Anshuman Gupta wrote:
>>> If i915 gem obj lies in lmem, then i915_gem_object_pin_map need to
>>> grab a rpm wakeref to make sure gfx PCIe endpoint function stays in D0
>>> state during any access to mapping returned by
>>> i915_gem_object_pin_map().
>>> Subsequently i915_gem_object_upin_map will put the wakref as well.
>>
>> Another thing to check are perma pinned contexts. Follow the flow from
>> intel_engine_create_pinned_context to intel_engine_destroy_pinned_context.
>> If you find out that kernel (&co) contexts are pinned for the duration of i915
>> load/bind and that they use lmem objects, that would mean wakeref is held for
>> the duration of i915 loaded state. Defeating the point and making the solution
>> effectively equal to just disabling RPM.
> That’s correct  intel_ring_pin can pin_map the lmem object.
>        if (i915_vma_is_map_and_fenceable(vma)) {
>                  addr = (void __force *)i915_vma_pin_iomap(vma);
>          } else {
>                  int type = i915_coherent_map_type(vma->vm->i915, vma->obj, false);
> 
>                  addr = i915_gem_object_pin_map(vma->obj, type);
>          }
> 
> If that is the case this RFC proposal will not work and in that case 

Right, or LRC state for perma pinned contexts is probably even more 
clear cut.

every caller of  i915_gem_object_pin_map need to grab the wakreref before
> accessing the retuned address by pin_map. Any inputs from you side for any other approach.

I didn't quite get what you meant here.

My point was that if my thinking that perma pinned contexts would hold 
the wakeref permanently is correct, that would prevent the approach from 
this patch to have a different effect from just disabling RPM. Would 
unpinning the perma pinned contexts on GT park be feasible? It's not a 5 
minute question and unfortunately I don't have enough time to go deep 
into this problem space. Like Hopefully someone else can jump in.

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 85482a04d158..f291f990838d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -95,6 +95,7 @@  void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	mutex_init(&obj->mm.get_page.lock);
 	INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN);
 	mutex_init(&obj->mm.get_dma_page.lock);
+	mutex_init(&obj->wakeref_lock);
 }
 
 /**
@@ -110,6 +111,7 @@  void __i915_gem_object_fini(struct drm_i915_gem_object *obj)
 {
 	mutex_destroy(&obj->mm.get_page.lock);
 	mutex_destroy(&obj->mm.get_dma_page.lock);
+	mutex_destroy(&obj->wakeref_lock);
 	dma_resv_fini(&obj->base._resv);
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 7317d4102955..b31ac6e4c272 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -501,6 +501,11 @@  static inline void i915_gem_object_flush_map(struct drm_i915_gem_object *obj)
  */
 static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
 {
+	mutext_lock(obj->wakeref_lock);
+	if (!--obj->wakeref_count)
+		intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, obj->wakeref);
+	mutext_unlock(obj->wakeref_lock);
+
 	i915_gem_object_unpin_pages(obj);
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 9f6b14ec189a..34aff95a1984 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -657,6 +657,20 @@  struct drm_i915_gem_object {
 
 		void *gvt_info;
 	};
+
+	/**
+	 * wakeref to protect the i915 lmem iomem mappings.
+	 * We don't pin_map an object partially that makes easy
+	 * to track the wakeref cookie, if wakeref is already held
+	 * then we don't need to grab it again for other pin_map.
+	 * first pin_map will grab the wakeref and last unpin_map
+	 * will put the wakeref.
+	 */
+	intel_wakeref_t wakeref;
+	unsigned int wakeref_count;
+
+	/** protects the wakeref_count wakeref cookie against multiple pin_map and unpin_map */
+	struct mutex wakeref_lock;
 };
 
 static inline struct drm_i915_gem_object *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 4df50b049cea..b638b5413280 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -370,6 +370,14 @@  void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 
 	assert_object_held(obj);
 
+	if (i915_gem_object_is_lmem(obj)) {
+		mutex_lock(&obj->wakeref_lock);
+		if (!obj->wakeref_count++)
+			obj->wakeref =
+				intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
+		mutex_unlock(&obj->wakeref_lock);
+	}
+
 	pinned = !(type & I915_MAP_OVERRIDE);
 	type &= ~I915_MAP_OVERRIDE;