diff mbox series

drm/i915: Flush the existing fence before GGTT read/write

Message ID 20190823150302.5719-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series drm/i915: Flush the existing fence before GGTT read/write | expand

Commit Message

Chris Wilson Aug. 23, 2019, 3:03 p.m. UTC
Our fence management is lazy, very lazy. If the user marks an object as
untiled, we do not immediately flush the fence but merely mark it as
dirty. On the use we have to remember to check and remove the fence, by
which time we hope it is idle and we do not have to wait.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111468
Fixes: 1f7fd484fff1 ("drm/i915: Replace i915_vma_put_fence()")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

Comments

Matthew Auld Aug. 23, 2019, 3:26 p.m. UTC | #1
On 23/08/2019 16:03, Chris Wilson wrote:
> Our fence management is lazy, very lazy. If the user marks an object as
> untiled, we do not immediately flush the fence but merely mark it as
> dirty. On the use we have to remember to check and remove the fence, by
> which time we hope it is idle and we do not have to wait.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111468
> Fixes: 1f7fd484fff1 ("drm/i915: Replace i915_vma_put_fence()")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.auld@intel.com>

Well that sucks, should we also try to be more solid on the reloc path?

> ---
>   drivers/gpu/drm/i915/i915_gem.c | 30 ++++++++++++++++++++++++++++--
>   1 file changed, 28 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index eb31b69a316a..41b28f6d8620 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -324,6 +324,26 @@ gtt_user_read(struct io_mapping *mapping,
>   	return unwritten;
>   }
>   
> +static int linear_ggtt_offset(struct i915_vma *vma, u64 *offset)
> +{
> +	int ret;
> +
> +	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
> +
> +	/* Open-coded i915_vma_pin_iomap() */
> +
> +	if (vma->fence) {
> +		mutex_lock(&vma->vm->mutex);
> +		ret = i915_vma_revoke_fence(vma);
> +		mutex_unlock(&vma->vm->mutex);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	*offset = i915_ggtt_offset(vma);
> +	return 0;
> +}
> +
>   static int
>   i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
>   		   const struct drm_i915_gem_pread *args)
> @@ -350,7 +370,10 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
>   					       PIN_NONBLOCK /* NOWARN */ |
>   					       PIN_NOEVICT);
>   	if (!IS_ERR(vma)) {
> -		node.start = i915_ggtt_offset(vma);
> +		ret = linear_ggtt_offset(vma, &node.start);
> +		if (ret)
> +			goto out_unpin;
> +
>   		node.allocated = false;

node.allocated = false should go before the jump.

>   	} else {
>   		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
> @@ -560,7 +583,10 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
>   					       PIN_NONBLOCK /* NOWARN */ |
>   					       PIN_NOEVICT);
>   	if (!IS_ERR(vma)) {
> -		node.start = i915_ggtt_offset(vma);
> +		ret = linear_ggtt_offset(vma, &node.start);
> +		if (ret)
> +			goto out_unpin;
> +
>   		node.allocated = false;
>   	} else {
>   		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
>
Matthew Auld Aug. 23, 2019, 3:28 p.m. UTC | #2
On 23/08/2019 16:26, Matthew Auld wrote:
> On 23/08/2019 16:03, Chris Wilson wrote:
>> Our fence management is lazy, very lazy. If the user marks an object as
>> untiled, we do not immediately flush the fence but merely mark it as
>> dirty. On the use we have to remember to check and remove the fence, by
>> which time we hope it is idle and we do not have to wait.
>>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111468
>> Fixes: 1f7fd484fff1 ("drm/i915: Replace i915_vma_put_fence()")
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Matthew Auld <matthew.auld@intel.com>
> 
> Well that sucks, should we also try to be more solid on the reloc path?

Also not really a blocker so,
Reviewed-by: Matthew Auld <matthew.auld@intel.com>

> 
>> ---
>>   drivers/gpu/drm/i915/i915_gem.c | 30 ++++++++++++++++++++++++++++--
>>   1 file changed, 28 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c 
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index eb31b69a316a..41b28f6d8620 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -324,6 +324,26 @@ gtt_user_read(struct io_mapping *mapping,
>>       return unwritten;
>>   }
>> +static int linear_ggtt_offset(struct i915_vma *vma, u64 *offset)
>> +{
>> +    int ret;
>> +
>> +    GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
>> +
>> +    /* Open-coded i915_vma_pin_iomap() */
>> +
>> +    if (vma->fence) {
>> +        mutex_lock(&vma->vm->mutex);
>> +        ret = i915_vma_revoke_fence(vma);
>> +        mutex_unlock(&vma->vm->mutex);
>> +        if (ret)
>> +            return ret;
>> +    }
>> +
>> +    *offset = i915_ggtt_offset(vma);
>> +    return 0;
>> +}
>> +
>>   static int
>>   i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
>>              const struct drm_i915_gem_pread *args)
>> @@ -350,7 +370,10 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
>>                              PIN_NONBLOCK /* NOWARN */ |
>>                              PIN_NOEVICT);
>>       if (!IS_ERR(vma)) {
>> -        node.start = i915_ggtt_offset(vma);
>> +        ret = linear_ggtt_offset(vma, &node.start);
>> +        if (ret)
>> +            goto out_unpin;
>> +
>>           node.allocated = false;
> 
> node.allocated = false should go before the jump.
> 
>>       } else {
>>           ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
>> @@ -560,7 +583,10 @@ i915_gem_gtt_pwrite_fast(struct 
>> drm_i915_gem_object *obj,
>>                              PIN_NONBLOCK /* NOWARN */ |
>>                              PIN_NOEVICT);
>>       if (!IS_ERR(vma)) {
>> -        node.start = i915_ggtt_offset(vma);
>> +        ret = linear_ggtt_offset(vma, &node.start);
>> +        if (ret)
>> +            goto out_unpin;
>> +
>>           node.allocated = false;
>>       } else {
>>           ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
>>
Chris Wilson Aug. 23, 2019, 3:31 p.m. UTC | #3
Quoting Matthew Auld (2019-08-23 16:26:16)
> On 23/08/2019 16:03, Chris Wilson wrote:
> > Our fence management is lazy, very lazy. If the user marks an object as
> > untiled, we do not immediately flush the fence but merely mark it as
> > dirty. On the use we have to remember to check and remove the fence, by
> > which time we hope it is idle and we do not have to wait.
> > 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111468
> > Fixes: 1f7fd484fff1 ("drm/i915: Replace i915_vma_put_fence()")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Matthew Auld <matthew.auld@intel.com>
> 
> Well that sucks, should we also try to be more solid on the reloc path?

I was thinking about pulling it into the object_ggtt_pin. That appears
to cover everyone.
-Chris
Chris Wilson Aug. 23, 2019, 3:44 p.m. UTC | #4
Quoting Chris Wilson (2019-08-23 16:31:19)
> Quoting Matthew Auld (2019-08-23 16:26:16)
> > On 23/08/2019 16:03, Chris Wilson wrote:
> > > Our fence management is lazy, very lazy. If the user marks an object as
> > > untiled, we do not immediately flush the fence but merely mark it as
> > > dirty. On the use we have to remember to check and remove the fence, by
> > > which time we hope it is idle and we do not have to wait.
> > > 
> > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111468
> > > Fixes: 1f7fd484fff1 ("drm/i915: Replace i915_vma_put_fence()")
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Matthew Auld <matthew.auld@intel.com>
> > 
> > Well that sucks, should we also try to be more solid on the reloc path?
> 
> I was thinking about pulling it into the object_ggtt_pin. That appears
> to cover everyone.

An alternative would be not to be so lazy on changing tiling mode. We
are lazy because of the userspace caching, where it may inherit an
inappropriate tiling and so change it during object initialisation. That
used to introduce a stall, hence being lazy.

Although, the fence is on the vma. If we don't like the current fence,
we could just allocate a new vma. (If there isn't enough room for the
fresh vma, we would evict and hopefully stall on LRU one.) Hmm.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index eb31b69a316a..41b28f6d8620 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -324,6 +324,26 @@  gtt_user_read(struct io_mapping *mapping,
 	return unwritten;
 }
 
+static int linear_ggtt_offset(struct i915_vma *vma, u64 *offset)
+{
+	int ret;
+
+	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
+
+	/* Open-coded i915_vma_pin_iomap() */
+
+	if (vma->fence) {
+		mutex_lock(&vma->vm->mutex);
+		ret = i915_vma_revoke_fence(vma);
+		mutex_unlock(&vma->vm->mutex);
+		if (ret)
+			return ret;
+	}
+
+	*offset = i915_ggtt_offset(vma);
+	return 0;
+}
+
 static int
 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		   const struct drm_i915_gem_pread *args)
@@ -350,7 +370,10 @@  i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 					       PIN_NONBLOCK /* NOWARN */ |
 					       PIN_NOEVICT);
 	if (!IS_ERR(vma)) {
-		node.start = i915_ggtt_offset(vma);
+		ret = linear_ggtt_offset(vma, &node.start);
+		if (ret)
+			goto out_unpin;
+
 		node.allocated = false;
 	} else {
 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
@@ -560,7 +583,10 @@  i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 					       PIN_NONBLOCK /* NOWARN */ |
 					       PIN_NOEVICT);
 	if (!IS_ERR(vma)) {
-		node.start = i915_ggtt_offset(vma);
+		ret = linear_ggtt_offset(vma, &node.start);
+		if (ret)
+			goto out_unpin;
+
 		node.allocated = false;
 	} else {
 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);