diff mbox

[v7,6/7] drm/i915: refactor duplicate object vmap functions (the final rework?)

Message ID 1456850039-25856-7-git-send-email-david.s.gordon@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon March 1, 2016, 4:33 p.m. UTC
This is essentially Chris Wilson's patch of a similar name, reworked on
top of Alex Dai's recent patch:
| drm/i915: Add i915_gem_object_vmap to map GEM object to virtual space

Chris' original commentary said:
| We now have two implementations for vmapping a whole object, one for
| dma-buf and one for the ringbuffer. If we couple the vmapping into
| the obj->pages lifetime, then we can reuse an obj->vmapping for both
| and at the same time couple it into the shrinker.
|
| v2: Mark the failable kmalloc() as __GFP_NOWARN (vsyrjala)
| v3: Call unpin_vmap from the right dmabuf unmapper

v4: reimplements the same functionality, but now as wrappers round the
    recently-introduced i915_gem_object_vmap_range() from Alex's patch
    mentioned above.

v5: separated from two minor but unrelated changes [Tvrtko Ursulin];
    this is the third and most substantial portion.

    Decided not to hold onto vmappings after the pin count goes to zero.
    This may reduce the benefit of Chris' scheme a bit, but does avoid
    any increased risk of exhausting kernel vmap space on 32-bit kernels
    [Tvrtko Ursulin]. Potentially, the vunmap() could be deferred until
    the put_pages() stage if a suitable notifier were written, but we're
    not doing that here. Nonetheless, the simplification of both dmabuf
    and ringbuffer code makes it worthwhile in its own right.

v6: change BUG_ON() to WARN_ON(). [Tvrtko Ursulin]

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Alex Dai <yu.dai@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         | 22 ++++++++++++++-----
 drivers/gpu/drm/i915/i915_gem.c         | 39 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_dmabuf.c  | 36 ++++--------------------------
 drivers/gpu/drm/i915/intel_ringbuffer.c |  9 ++++----
 4 files changed, 65 insertions(+), 41 deletions(-)

Comments

Chris Wilson March 2, 2016, 12:08 p.m. UTC | #1
On Tue, Mar 01, 2016 at 04:33:58PM +0000, Dave Gordon wrote:
> This is essentially Chris Wilson's patch of a similar name, reworked on
> top of Alex Dai's recent patch:
> | drm/i915: Add i915_gem_object_vmap to map GEM object to virtual space
> 
> Chris' original commentary said:
> | We now have two implementations for vmapping a whole object, one for
> | dma-buf and one for the ringbuffer. If we couple the vmapping into
> | the obj->pages lifetime, then we can reuse an obj->vmapping for both
> | and at the same time couple it into the shrinker.
> |
> | v2: Mark the failable kmalloc() as __GFP_NOWARN (vsyrjala)
> | v3: Call unpin_vmap from the right dmabuf unmapper
> 
> v4: reimplements the same functionality, but now as wrappers round the
>     recently-introduced i915_gem_object_vmap_range() from Alex's patch
>     mentioned above.
> 
> v5: separated from two minor but unrelated changes [Tvrtko Ursulin];
>     this is the third and most substantial portion.
> 
>     Decided not to hold onto vmappings after the pin count goes to zero.
>     This may reduce the benefit of Chris' scheme a bit, but does avoid
>     any increased risk of exhausting kernel vmap space on 32-bit kernels
>     [Tvrtko Ursulin]. Potentially, the vunmap() could be deferred until
>     the put_pages() stage if a suitable notifier were written, but we're
>     not doing that here. Nonetheless, the simplification of both dmabuf
>     and ringbuffer code makes it worthwhile in its own right.
> 
> v6: change BUG_ON() to WARN_ON(). [Tvrtko Ursulin]
> 
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Alex Dai <yu.dai@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         | 22 ++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem.c         | 39 +++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c  | 36 ++++--------------------------
>  drivers/gpu/drm/i915/intel_ringbuffer.c |  9 ++++----
>  4 files changed, 65 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index b3ae191..f1ad3b3 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2172,10 +2172,7 @@ struct drm_i915_gem_object {
>  		struct scatterlist *sg;
>  		int last;
>  	} get_page;
> -
> -	/* prime dma-buf support */
> -	void *dma_buf_vmapping;
> -	int vmapping_count;
> +	void *vmapping;
>  
>  	/** Breadcrumb of last rendering to the buffer.
>  	 * There can only be one writer, but we allow for multiple readers.
> @@ -2980,7 +2977,22 @@ static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
>  static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>  {
>  	BUG_ON(obj->pages_pin_count == 0);
> -	obj->pages_pin_count--;
> +	if (--obj->pages_pin_count == 0 && obj->vmapping) {
> +		/*
> +		 * Releasing the vmapping here may yield less benefit than
> +		 * if we kept it until put_pages(), but on the other hand

Yields no benefit. Makes the patch pointless.
Plus there is also pressure to enable WC vmaps.
-Chris
Dave Gordon March 2, 2016, 3:40 p.m. UTC | #2
On 02/03/16 12:08, Chris Wilson wrote:
> On Tue, Mar 01, 2016 at 04:33:58PM +0000, Dave Gordon wrote:
>> This is essentially Chris Wilson's patch of a similar name, reworked on
>> top of Alex Dai's recent patch:
>> | drm/i915: Add i915_gem_object_vmap to map GEM object to virtual space
>>
>> Chris' original commentary said:
>> | We now have two implementations for vmapping a whole object, one for
>> | dma-buf and one for the ringbuffer. If we couple the vmapping into
>> | the obj->pages lifetime, then we can reuse an obj->vmapping for both
>> | and at the same time couple it into the shrinker.
>> |
>> | v2: Mark the failable kmalloc() as __GFP_NOWARN (vsyrjala)
>> | v3: Call unpin_vmap from the right dmabuf unmapper
>>
>> v4: reimplements the same functionality, but now as wrappers round the
>>      recently-introduced i915_gem_object_vmap_range() from Alex's patch
>>      mentioned above.
>>
>> v5: separated from two minor but unrelated changes [Tvrtko Ursulin];
>>      this is the third and most substantial portion.
>>
>>      Decided not to hold onto vmappings after the pin count goes to zero.
>>      This may reduce the benefit of Chris' scheme a bit, but does avoid
>>      any increased risk of exhausting kernel vmap space on 32-bit kernels
>>      [Tvrtko Ursulin]. Potentially, the vunmap() could be deferred until
>>      the put_pages() stage if a suitable notifier were written, but we're
>>      not doing that here. Nonetheless, the simplification of both dmabuf
>>      and ringbuffer code makes it worthwhile in its own right.
>>
>> v6: change BUG_ON() to WARN_ON(). [Tvrtko Ursulin]
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Alex Dai <yu.dai@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h         | 22 ++++++++++++++-----
>>   drivers/gpu/drm/i915/i915_gem.c         | 39 +++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_gem_dmabuf.c  | 36 ++++--------------------------
>>   drivers/gpu/drm/i915/intel_ringbuffer.c |  9 ++++----
>>   4 files changed, 65 insertions(+), 41 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index b3ae191..f1ad3b3 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2172,10 +2172,7 @@ struct drm_i915_gem_object {
>>   		struct scatterlist *sg;
>>   		int last;
>>   	} get_page;
>> -
>> -	/* prime dma-buf support */
>> -	void *dma_buf_vmapping;
>> -	int vmapping_count;
>> +	void *vmapping;
>>
>>   	/** Breadcrumb of last rendering to the buffer.
>>   	 * There can only be one writer, but we allow for multiple readers.
>> @@ -2980,7 +2977,22 @@ static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
>>   static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>>   {
>>   	BUG_ON(obj->pages_pin_count == 0);
>> -	obj->pages_pin_count--;
>> +	if (--obj->pages_pin_count == 0 && obj->vmapping) {
>> +		/*
>> +		 * Releasing the vmapping here may yield less benefit than
>> +		 * if we kept it until put_pages(), but on the other hand
>
> Yields no benefit. Makes the patch pointless.
> Plus there is also pressure to enable WC vmaps.
> -Chris

The patch is not pointless -- at the very least, it:
+ reduces the size of "struct drm_i915_gem_object" (OK, only by 4 bytes)
+ replaces special-function code for dmabufs with more generic code that 
can be reused for other objects (for now, ringbuffers; next GuC-shared 
objects -- see Alex's patch "drm/i915/guc: Simplify code by keeping vmap 
of guc_client object" which will eliminate lot of short-term 
kmap_atomics with persistent kmaps).
+ provides a shorthand for the sequence of { get_pages(), pin_pages(), 
vmap() } so we don't have to open-code it (and deal with all the error 
paths) in several different places

Thus there is an engineering benefit even if this version doesn't 
provide any performance benefit. And if, as the next step, you want to 
extend the vmap lifetime, you just have to remove those few lines in 
i915_gem_object_unpin_pages() and incorporate the notifier that you 
prototyped earlier -- if it actually provides any performance boost.

.Dave.
Tvrtko Ursulin March 8, 2016, 9:43 a.m. UTC | #3
On 02/03/16 15:40, Dave Gordon wrote:
> On 02/03/16 12:08, Chris Wilson wrote:
>> On Tue, Mar 01, 2016 at 04:33:58PM +0000, Dave Gordon wrote:
>>> This is essentially Chris Wilson's patch of a similar name, reworked on
>>> top of Alex Dai's recent patch:
>>> | drm/i915: Add i915_gem_object_vmap to map GEM object to virtual space
>>>
>>> Chris' original commentary said:
>>> | We now have two implementations for vmapping a whole object, one for
>>> | dma-buf and one for the ringbuffer. If we couple the vmapping into
>>> | the obj->pages lifetime, then we can reuse an obj->vmapping for both
>>> | and at the same time couple it into the shrinker.
>>> |
>>> | v2: Mark the failable kmalloc() as __GFP_NOWARN (vsyrjala)
>>> | v3: Call unpin_vmap from the right dmabuf unmapper
>>>
>>> v4: reimplements the same functionality, but now as wrappers round the
>>>      recently-introduced i915_gem_object_vmap_range() from Alex's patch
>>>      mentioned above.
>>>
>>> v5: separated from two minor but unrelated changes [Tvrtko Ursulin];
>>>      this is the third and most substantial portion.
>>>
>>>      Decided not to hold onto vmappings after the pin count goes to
>>> zero.
>>>      This may reduce the benefit of Chris' scheme a bit, but does avoid
>>>      any increased risk of exhausting kernel vmap space on 32-bit
>>> kernels
>>>      [Tvrtko Ursulin]. Potentially, the vunmap() could be deferred until
>>>      the put_pages() stage if a suitable notifier were written, but
>>> we're
>>>      not doing that here. Nonetheless, the simplification of both dmabuf
>>>      and ringbuffer code makes it worthwhile in its own right.
>>>
>>> v6: change BUG_ON() to WARN_ON(). [Tvrtko Ursulin]
>>>
>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Alex Dai <yu.dai@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_drv.h         | 22 ++++++++++++++-----
>>>   drivers/gpu/drm/i915/i915_gem.c         | 39
>>> +++++++++++++++++++++++++++++++++
>>>   drivers/gpu/drm/i915/i915_gem_dmabuf.c  | 36
>>> ++++--------------------------
>>>   drivers/gpu/drm/i915/intel_ringbuffer.c |  9 ++++----
>>>   4 files changed, 65 insertions(+), 41 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index b3ae191..f1ad3b3 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -2172,10 +2172,7 @@ struct drm_i915_gem_object {
>>>           struct scatterlist *sg;
>>>           int last;
>>>       } get_page;
>>> -
>>> -    /* prime dma-buf support */
>>> -    void *dma_buf_vmapping;
>>> -    int vmapping_count;
>>> +    void *vmapping;
>>>
>>>       /** Breadcrumb of last rendering to the buffer.
>>>        * There can only be one writer, but we allow for multiple
>>> readers.
>>> @@ -2980,7 +2977,22 @@ static inline void
>>> i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
>>>   static inline void i915_gem_object_unpin_pages(struct
>>> drm_i915_gem_object *obj)
>>>   {
>>>       BUG_ON(obj->pages_pin_count == 0);
>>> -    obj->pages_pin_count--;
>>> +    if (--obj->pages_pin_count == 0 && obj->vmapping) {
>>> +        /*
>>> +         * Releasing the vmapping here may yield less benefit than
>>> +         * if we kept it until put_pages(), but on the other hand
>>
>> Yields no benefit. Makes the patch pointless.
>> Plus there is also pressure to enable WC vmaps.
>> -Chris
>
> The patch is not pointless -- at the very least, it:
> + reduces the size of "struct drm_i915_gem_object" (OK, only by 4 bytes)
> + replaces special-function code for dmabufs with more generic code that
> can be reused for other objects (for now, ringbuffers; next GuC-shared
> objects -- see Alex's patch "drm/i915/guc: Simplify code by keeping vmap
> of guc_client object" which will eliminate lot of short-term
> kmap_atomics with persistent kmaps).
> + provides a shorthand for the sequence of { get_pages(), pin_pages(),
> vmap() } so we don't have to open-code it (and deal with all the error
> paths) in several different places
>
> Thus there is an engineering benefit even if this version doesn't
> provide any performance benefit. And if, as the next step, you want to
> extend the vmap lifetime, you just have to remove those few lines in
> i915_gem_object_unpin_pages() and incorporate the notifier that you
> prototyped earlier -- if it actually provides any performance boost.

So Chris do you ack on this series on the basis of the above - that it 
consolidates the current code and following GuC patch will be another 
user of the pin_vmap API?

Regards,

Tvrtko
Dave Gordon March 22, 2016, 3:25 p.m. UTC | #4
On 08/03/16 09:43, Tvrtko Ursulin wrote:
>
> On 02/03/16 15:40, Dave Gordon wrote:
>> On 02/03/16 12:08, Chris Wilson wrote:
>>> On Tue, Mar 01, 2016 at 04:33:58PM +0000, Dave Gordon wrote:
>>>> This is essentially Chris Wilson's patch of a similar name, reworked on
>>>> top of Alex Dai's recent patch:
>>>> | drm/i915: Add i915_gem_object_vmap to map GEM object to virtual space
>>>>
>>>> Chris' original commentary said:
>>>> | We now have two implementations for vmapping a whole object, one for
>>>> | dma-buf and one for the ringbuffer. If we couple the vmapping into
>>>> | the obj->pages lifetime, then we can reuse an obj->vmapping for both
>>>> | and at the same time couple it into the shrinker.
>>>> |
>>>> | v2: Mark the failable kmalloc() as __GFP_NOWARN (vsyrjala)
>>>> | v3: Call unpin_vmap from the right dmabuf unmapper
>>>>
>>>> v4: reimplements the same functionality, but now as wrappers round the
>>>>      recently-introduced i915_gem_object_vmap_range() from Alex's patch
>>>>      mentioned above.
>>>>
>>>> v5: separated from two minor but unrelated changes [Tvrtko Ursulin];
>>>>      this is the third and most substantial portion.
>>>>
>>>>      Decided not to hold onto vmappings after the pin count goes to
>>>> zero.
>>>>      This may reduce the benefit of Chris' scheme a bit, but does avoid
>>>>      any increased risk of exhausting kernel vmap space on 32-bit
>>>> kernels
>>>>      [Tvrtko Ursulin]. Potentially, the vunmap() could be deferred
>>>> until
>>>>      the put_pages() stage if a suitable notifier were written, but
>>>> we're
>>>>      not doing that here. Nonetheless, the simplification of both
>>>> dmabuf
>>>>      and ringbuffer code makes it worthwhile in its own right.
>>>>
>>>> v6: change BUG_ON() to WARN_ON(). [Tvrtko Ursulin]
>>>>
>>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>> Cc: Alex Dai <yu.dai@intel.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/i915_drv.h         | 22 ++++++++++++++-----
>>>>   drivers/gpu/drm/i915/i915_gem.c         | 39
>>>> +++++++++++++++++++++++++++++++++
>>>>   drivers/gpu/drm/i915/i915_gem_dmabuf.c  | 36
>>>> ++++--------------------------
>>>>   drivers/gpu/drm/i915/intel_ringbuffer.c |  9 ++++----
>>>>   4 files changed, 65 insertions(+), 41 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>>> b/drivers/gpu/drm/i915/i915_drv.h
>>>> index b3ae191..f1ad3b3 100644
>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>> @@ -2172,10 +2172,7 @@ struct drm_i915_gem_object {
>>>>           struct scatterlist *sg;
>>>>           int last;
>>>>       } get_page;
>>>> -
>>>> -    /* prime dma-buf support */
>>>> -    void *dma_buf_vmapping;
>>>> -    int vmapping_count;
>>>> +    void *vmapping;
>>>>
>>>>       /** Breadcrumb of last rendering to the buffer.
>>>>        * There can only be one writer, but we allow for multiple
>>>> readers.
>>>> @@ -2980,7 +2977,22 @@ static inline void
>>>> i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
>>>>   static inline void i915_gem_object_unpin_pages(struct
>>>> drm_i915_gem_object *obj)
>>>>   {
>>>>       BUG_ON(obj->pages_pin_count == 0);
>>>> -    obj->pages_pin_count--;
>>>> +    if (--obj->pages_pin_count == 0 && obj->vmapping) {
>>>> +        /*
>>>> +         * Releasing the vmapping here may yield less benefit than
>>>> +         * if we kept it until put_pages(), but on the other hand
>>>
>>> Yields no benefit. Makes the patch pointless.
>>> Plus there is also pressure to enable WC vmaps.
>>> -Chris
>>
>> The patch is not pointless -- at the very least, it:
>> + reduces the size of "struct drm_i915_gem_object" (OK, only by 4 bytes)
>> + replaces special-function code for dmabufs with more generic code that
>> can be reused for other objects (for now, ringbuffers; next GuC-shared
>> objects -- see Alex's patch "drm/i915/guc: Simplify code by keeping vmap
>> of guc_client object" which will eliminate lot of short-term
>> kmap_atomics with persistent kmaps).
>> + provides a shorthand for the sequence of { get_pages(), pin_pages(),
>> vmap() } so we don't have to open-code it (and deal with all the error
>> paths) in several different places
>>
>> Thus there is an engineering benefit even if this version doesn't
>> provide any performance benefit. And if, as the next step, you want to
>> extend the vmap lifetime, you just have to remove those few lines in
>> i915_gem_object_unpin_pages() and incorporate the notifier that you
>> prototyped earlier -- if it actually provides any performance boost.
>
> So Chris do you ack on this series on the basis of the above - that it
> consolidates the current code and following GuC patch will be another
> user of the pin_vmap API?
>
> Regards,
> Tvrtko

I see that Chris has posted a patch to add a vmap notifier, although it 
hasn't yet got its R-B. So I suggest we merge this patch series now, and 
then update it by moving the vunmap() into put_pages() when Chris has 
the notifier finalised. IIRC you wanted Daniel to merge the new DRM bits 
(patches 3 and 7, which already have their R-Bs) ?

Or we can merge 1-5+7, all of which already have R-Bs, and I can turn
6 into a GuC-private version, without the benefit of simplifying and 
unifying the corresponding object-mapping management in the DMAbuf and 
ringbuffer code.

Or I can repost just the bits that don't rely on drm_malloc_gfp() and 
exclude the final patch so that we can move ahead on the bits we 
actually want for improving the performance of the GuC interface and 
reducing the number of kmap_atomic calls elsewhere, and then the omitted 
bits can be added back once drm_malloc_gfp() has been merged upstream 
and the notifier is working.

.Dave.
Tvrtko Ursulin March 23, 2016, 12:23 p.m. UTC | #5
On 22/03/16 15:25, Dave Gordon wrote:
> On 08/03/16 09:43, Tvrtko Ursulin wrote:
>>
>> On 02/03/16 15:40, Dave Gordon wrote:
>>> On 02/03/16 12:08, Chris Wilson wrote:
>>>> On Tue, Mar 01, 2016 at 04:33:58PM +0000, Dave Gordon wrote:
>>>>> This is essentially Chris Wilson's patch of a similar name,
>>>>> reworked on
>>>>> top of Alex Dai's recent patch:
>>>>> | drm/i915: Add i915_gem_object_vmap to map GEM object to virtual
>>>>> space
>>>>>
>>>>> Chris' original commentary said:
>>>>> | We now have two implementations for vmapping a whole object, one for
>>>>> | dma-buf and one for the ringbuffer. If we couple the vmapping into
>>>>> | the obj->pages lifetime, then we can reuse an obj->vmapping for both
>>>>> | and at the same time couple it into the shrinker.
>>>>> |
>>>>> | v2: Mark the failable kmalloc() as __GFP_NOWARN (vsyrjala)
>>>>> | v3: Call unpin_vmap from the right dmabuf unmapper
>>>>>
>>>>> v4: reimplements the same functionality, but now as wrappers round the
>>>>>      recently-introduced i915_gem_object_vmap_range() from Alex's
>>>>> patch
>>>>>      mentioned above.
>>>>>
>>>>> v5: separated from two minor but unrelated changes [Tvrtko Ursulin];
>>>>>      this is the third and most substantial portion.
>>>>>
>>>>>      Decided not to hold onto vmappings after the pin count goes to
>>>>> zero.
>>>>>      This may reduce the benefit of Chris' scheme a bit, but does
>>>>> avoid
>>>>>      any increased risk of exhausting kernel vmap space on 32-bit
>>>>> kernels
>>>>>      [Tvrtko Ursulin]. Potentially, the vunmap() could be deferred
>>>>> until
>>>>>      the put_pages() stage if a suitable notifier were written, but
>>>>> we're
>>>>>      not doing that here. Nonetheless, the simplification of both
>>>>> dmabuf
>>>>>      and ringbuffer code makes it worthwhile in its own right.
>>>>>
>>>>> v6: change BUG_ON() to WARN_ON(). [Tvrtko Ursulin]
>>>>>
>>>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Alex Dai <yu.dai@intel.com>
>>>>> ---
>>>>>   drivers/gpu/drm/i915/i915_drv.h         | 22 ++++++++++++++-----
>>>>>   drivers/gpu/drm/i915/i915_gem.c         | 39
>>>>> +++++++++++++++++++++++++++++++++
>>>>>   drivers/gpu/drm/i915/i915_gem_dmabuf.c  | 36
>>>>> ++++--------------------------
>>>>>   drivers/gpu/drm/i915/intel_ringbuffer.c |  9 ++++----
>>>>>   4 files changed, 65 insertions(+), 41 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>>>> b/drivers/gpu/drm/i915/i915_drv.h
>>>>> index b3ae191..f1ad3b3 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>>> @@ -2172,10 +2172,7 @@ struct drm_i915_gem_object {
>>>>>           struct scatterlist *sg;
>>>>>           int last;
>>>>>       } get_page;
>>>>> -
>>>>> -    /* prime dma-buf support */
>>>>> -    void *dma_buf_vmapping;
>>>>> -    int vmapping_count;
>>>>> +    void *vmapping;
>>>>>
>>>>>       /** Breadcrumb of last rendering to the buffer.
>>>>>        * There can only be one writer, but we allow for multiple
>>>>> readers.
>>>>> @@ -2980,7 +2977,22 @@ static inline void
>>>>> i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
>>>>>   static inline void i915_gem_object_unpin_pages(struct
>>>>> drm_i915_gem_object *obj)
>>>>>   {
>>>>>       BUG_ON(obj->pages_pin_count == 0);
>>>>> -    obj->pages_pin_count--;
>>>>> +    if (--obj->pages_pin_count == 0 && obj->vmapping) {
>>>>> +        /*
>>>>> +         * Releasing the vmapping here may yield less benefit than
>>>>> +         * if we kept it until put_pages(), but on the other hand
>>>>
>>>> Yields no benefit. Makes the patch pointless.
>>>> Plus there is also pressure to enable WC vmaps.
>>>> -Chris
>>>
>>> The patch is not pointless -- at the very least, it:
>>> + reduces the size of "struct drm_i915_gem_object" (OK, only by 4 bytes)
>>> + replaces special-function code for dmabufs with more generic code that
>>> can be reused for other objects (for now, ringbuffers; next GuC-shared
>>> objects -- see Alex's patch "drm/i915/guc: Simplify code by keeping vmap
>>> of guc_client object" which will eliminate lot of short-term
>>> kmap_atomics with persistent kmaps).
>>> + provides a shorthand for the sequence of { get_pages(), pin_pages(),
>>> vmap() } so we don't have to open-code it (and deal with all the error
>>> paths) in several different places
>>>
>>> Thus there is an engineering benefit even if this version doesn't
>>> provide any performance benefit. And if, as the next step, you want to
>>> extend the vmap lifetime, you just have to remove those few lines in
>>> i915_gem_object_unpin_pages() and incorporate the notifier that you
>>> prototyped earlier -- if it actually provides any performance boost.
>>
>> So Chris do you ack on this series on the basis of the above - that it
>> consolidates the current code and following GuC patch will be another
>> user of the pin_vmap API?
>>
>> Regards,
>> Tvrtko
>
> I see that Chris has posted a patch to add a vmap notifier, although it
> hasn't yet got its R-B. So I suggest we merge this patch series now, and
> then update it by moving the vunmap() into put_pages() when Chris has
> the notifier finalised. IIRC you wanted Daniel to merge the new DRM bits
> (patches 3 and 7, which already have their R-Bs) ?
>
> Or we can merge 1-5+7, all of which already have R-Bs, and I can turn
> 6 into a GuC-private version, without the benefit of simplifying and
> unifying the corresponding object-mapping management in the DMAbuf and
> ringbuffer code.
>
> Or I can repost just the bits that don't rely on drm_malloc_gfp() and
> exclude the final patch so that we can move ahead on the bits we
> actually want for improving the performance of the GuC interface and
> reducing the number of kmap_atomic calls elsewhere, and then the omitted
> bits can be added back once drm_malloc_gfp() has been merged upstream
> and the notifier is working.

I've chatted with Chris and Daniel on IRC and here is the summary and 
way forward I think.

1. Drop 6/7, and probably 7/7 unless you can get etnaviv people to r-b/ack.

2. Add the patch which fixes the actual scheduling while atomic in GuC
to the end of the series with a Bugzila & Testcase tag in that patch.

(This step should allow Chris to provide an Acked-by.)

3. Cc dri-devel on all patches of the series since some touch DRM core. 
(This is standard recommended practice).

4. Rebase & resend as new series.

5. Review the new patch in the series.

6. Explain CI results.

7. Merge. :)

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b3ae191..f1ad3b3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2172,10 +2172,7 @@  struct drm_i915_gem_object {
 		struct scatterlist *sg;
 		int last;
 	} get_page;
-
-	/* prime dma-buf support */
-	void *dma_buf_vmapping;
-	int vmapping_count;
+	void *vmapping;
 
 	/** Breadcrumb of last rendering to the buffer.
 	 * There can only be one writer, but we allow for multiple readers.
@@ -2980,7 +2977,22 @@  static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 {
 	BUG_ON(obj->pages_pin_count == 0);
-	obj->pages_pin_count--;
+	if (--obj->pages_pin_count == 0 && obj->vmapping) {
+		/*
+		 * Releasing the vmapping here may yield less benefit than
+		 * if we kept it until put_pages(), but on the other hand
+		 * avoids issues of exhausting kernel vmappable address
+		 * space on 32-bit kernels.
+		 */
+		vunmap(obj->vmapping);
+		obj->vmapping = NULL;
+	}
+}
+
+void *__must_check i915_gem_object_pin_vmap(struct drm_i915_gem_object *obj);
+static inline void i915_gem_object_unpin_vmap(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
 }
 
 void *__must_check i915_gem_object_vmap_range(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5b6774b..4bca643 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2235,6 +2235,12 @@  static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 	ops->put_pages(obj);
 	obj->pages = NULL;
 
+	/* vmapping should have been dropped when pages_pin_count went to 0 */
+	if (WARN_ON(obj->vmapping)) {
+		vunmap(obj->vmapping);
+		obj->vmapping = NULL;
+	}
+
 	i915_gem_object_invalidate(obj);
 
 	return 0;
@@ -2464,6 +2470,39 @@  void *i915_gem_object_vmap_range(struct drm_i915_gem_object *obj,
 	return addr;
 }
 
+/**
+ * i915_gem_object_pin_vmap - pin a GEM object and map it into kernel space
+ * @obj: the GEM object to be mapped
+ *
+ * Combines the functions of get_pages(), pin_pages() and vmap_range() on
+ * the whole object.  The caller should release the mapping by calling
+ * i915_gem_object_unpin_vmap() when it is no longer required.
+ *
+ * Returns the address at which the object has been mapped, or an ERR_PTR
+ * on failure.
+ */
+void *i915_gem_object_pin_vmap(struct drm_i915_gem_object *obj)
+{
+	int ret;
+
+	ret = i915_gem_object_get_pages(obj);
+	if (ret)
+		return ERR_PTR(ret);
+
+	i915_gem_object_pin_pages(obj);
+
+	if (obj->vmapping == NULL) {
+		obj->vmapping = i915_gem_object_vmap_range(obj, 0, 0);
+
+		if (obj->vmapping == NULL) {
+			i915_gem_object_unpin_pages(obj);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+
+	return obj->vmapping;
+}
+
 void i915_vma_move_to_active(struct i915_vma *vma,
 			     struct drm_i915_gem_request *req)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 3a5d01a..adc7b5e 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -108,40 +108,17 @@  static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
+	void *addr;
 	int ret;
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		return ERR_PTR(ret);
 
-	if (obj->dma_buf_vmapping) {
-		obj->vmapping_count++;
-		goto out_unlock;
-	}
-
-	ret = i915_gem_object_get_pages(obj);
-	if (ret)
-		goto err;
-
-	i915_gem_object_pin_pages(obj);
-
-	ret = -ENOMEM;
-
-	obj->dma_buf_vmapping = i915_gem_object_vmap_range(obj, 0, 0);
-
-	if (!obj->dma_buf_vmapping)
-		goto err_unpin;
-
-	obj->vmapping_count = 1;
-out_unlock:
+	addr = i915_gem_object_pin_vmap(obj);
 	mutex_unlock(&dev->struct_mutex);
-	return obj->dma_buf_vmapping;
 
-err_unpin:
-	i915_gem_object_unpin_pages(obj);
-err:
-	mutex_unlock(&dev->struct_mutex);
-	return ERR_PTR(ret);
+	return addr;
 }
 
 static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
@@ -150,12 +127,7 @@  static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
 	struct drm_device *dev = obj->base.dev;
 
 	mutex_lock(&dev->struct_mutex);
-	if (--obj->vmapping_count == 0) {
-		vunmap(obj->dma_buf_vmapping);
-		obj->dma_buf_vmapping = NULL;
-
-		i915_gem_object_unpin_pages(obj);
-	}
+	i915_gem_object_unpin_vmap(obj);
 	mutex_unlock(&dev->struct_mutex);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 58a18e1..47f186e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2056,7 +2056,7 @@  static int init_phys_status_page(struct intel_engine_cs *ring)
 void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
 {
 	if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen)
-		vunmap(ringbuf->virtual_start);
+		i915_gem_object_unpin_vmap(ringbuf->obj);
 	else
 		iounmap(ringbuf->virtual_start);
 	ringbuf->virtual_start = NULL;
@@ -2080,9 +2080,10 @@  int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
 		if (ret)
 			goto unpin;
 
-		ringbuf->virtual_start = i915_gem_object_vmap_range(obj, 0, 0);
-		if (ringbuf->virtual_start == NULL) {
-			ret = -ENOMEM;
+		ringbuf->virtual_start = i915_gem_object_pin_vmap(obj);
+		if (IS_ERR(ringbuf->virtual_start)) {
+			ret = PTR_ERR(ringbuf->virtual_start);
+			ringbuf->virtual_start = NULL;
 			goto unpin;
 		}
 	} else {