diff mbox

[v2,3/5] drm/i915/guc: don't spinwait if the GuC's workqueue is full

Message ID 1461780195-17434-3-git-send-email-david.s.gordon@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon April 27, 2016, 6:03 p.m. UTC
Rather than wait to see whether more space becomes available in the GuC
submission workqueue, we can just return -EAGAIN and let the caller try
again in a little while. This gets rid of an uninterruptable sleep in
the polling code :)

We'll also add a counter to the GuC client statistics, to see how often
we find the WQ full.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  1 +
 drivers/gpu/drm/i915/i915_guc_submission.c | 16 +++++-----------
 drivers/gpu/drm/i915/intel_guc.h           |  8 ++++----
 3 files changed, 10 insertions(+), 15 deletions(-)

Comments

Tvrtko Ursulin April 29, 2016, 3:17 p.m. UTC | #1
On 27/04/16 19:03, Dave Gordon wrote:
> Rather than wait to see whether more space becomes available in the GuC
> submission workqueue, we can just return -EAGAIN and let the caller try
> again in a little while. This gets rid of an uninterruptable sleep in
> the polling code :)
>
> We'll also add a counter to the GuC client statistics, to see how often
> we find the WQ full.
>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        |  1 +
>   drivers/gpu/drm/i915/i915_guc_submission.c | 16 +++++-----------
>   drivers/gpu/drm/i915/intel_guc.h           |  8 ++++----
>   3 files changed, 10 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 8b8d6f0..1024947 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2509,6 +2509,7 @@ static void i915_guc_client_info(struct seq_file *m,
>   	seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
>   		client->wq_size, client->wq_offset, client->wq_tail);
>
> +	seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
>   	seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
>   	seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
>   	seq_printf(m, "\tLast submission result: %d\n", client->retcode);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 66af5ce..6626eff 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -453,27 +453,21 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
>
>   int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>   {
> -	const size_t size = sizeof(struct guc_wq_item);
> +	const size_t wqi_size = sizeof(struct guc_wq_item);
>   	struct i915_guc_client *gc = request->i915->guc.execbuf_client;
>   	struct guc_process_desc *desc;
> -	int ret = -ETIMEDOUT, timeout_counter = 200;
>
>   	if (!gc)
>   		return 0;

Not part of this patch but spotted it - can this really happen, and if 
so, does it warrant a WARN_ONCE, GEM_BUG_ON or something?

>
>   	desc = gc->client_base + gc->proc_desc_offset;
>
> -	while (timeout_counter-- > 0) {
> -		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
> -			ret = 0;
> -			break;
> -		}
> +	if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= wqi_size)
> +		return 0;

Could stick a likely here to lay out the code for the expected case but 
it could be just OCD. :)

>
> -		if (timeout_counter)
> -			usleep_range(1000, 2000);
> -	};
> +	gc->no_wq_space += 1;
>
> -	return ret;
> +	return -EAGAIN;

I suppose it is OK to make userspace pay the cost since this should be 
extremely rare, correct?

>   }
>
>   static int guc_add_workqueue_item(struct i915_guc_client *gc,
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index b37c731..436f2d6 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -73,10 +73,10 @@ struct i915_guc_client {
>
>   	/* GuC submission statistics & status */
>   	uint64_t submissions[GUC_MAX_ENGINES_NUM];
> -	uint32_t q_fail;
> -	uint32_t b_fail;
> -	int retcode;
> -	int spare;			/* pad to 32 DWords		*/
> +	uint32_t no_wq_space;		/* Space pre-check failed	*/
> +	uint32_t q_fail;		/* Failed to queue (MBZ)	*/
> +	uint32_t b_fail;		/* Doorbell failure (MBZ)	*/
> +	int retcode;			/* Result of last guc_submit()	*/
>   };
>
>   enum intel_guc_fw_status {
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Tvrtko Ursulin April 29, 2016, 3:45 p.m. UTC | #2
One late comment:

On 27/04/16 19:03, Dave Gordon wrote:
> Rather than wait to see whether more space becomes available in the GuC
> submission workqueue, we can just return -EAGAIN and let the caller try
> again in a little while. This gets rid of an uninterruptable sleep in
> the polling code :)
>
> We'll also add a counter to the GuC client statistics, to see how often
> we find the WQ full.
>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        |  1 +
>   drivers/gpu/drm/i915/i915_guc_submission.c | 16 +++++-----------
>   drivers/gpu/drm/i915/intel_guc.h           |  8 ++++----
>   3 files changed, 10 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 8b8d6f0..1024947 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2509,6 +2509,7 @@ static void i915_guc_client_info(struct seq_file *m,
>   	seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
>   		client->wq_size, client->wq_offset, client->wq_tail);
>
> +	seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
>   	seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
>   	seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
>   	seq_printf(m, "\tLast submission result: %d\n", client->retcode);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 66af5ce..6626eff 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -453,27 +453,21 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
>
>   int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>   {
> -	const size_t size = sizeof(struct guc_wq_item);
> +	const size_t wqi_size = sizeof(struct guc_wq_item);
>   	struct i915_guc_client *gc = request->i915->guc.execbuf_client;
>   	struct guc_process_desc *desc;
> -	int ret = -ETIMEDOUT, timeout_counter = 200;
>
>   	if (!gc)
>   		return 0;
>
>   	desc = gc->client_base + gc->proc_desc_offset;
>
> -	while (timeout_counter-- > 0) {
> -		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
> -			ret = 0;
> -			break;
> -		}
> +	if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= wqi_size)
> +		return 0;
>
> -		if (timeout_counter)
> -			usleep_range(1000, 2000);
> -	};
> +	gc->no_wq_space += 1;
>
> -	return ret;
> +	return -EAGAIN;
>   }
>
>   static int guc_add_workqueue_item(struct i915_guc_client *gc,
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index b37c731..436f2d6 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -73,10 +73,10 @@ struct i915_guc_client {
>
>   	/* GuC submission statistics & status */
>   	uint64_t submissions[GUC_MAX_ENGINES_NUM];
> -	uint32_t q_fail;
> -	uint32_t b_fail;
> -	int retcode;
> -	int spare;			/* pad to 32 DWords		*/
> +	uint32_t no_wq_space;		/* Space pre-check failed	*/
> +	uint32_t q_fail;		/* Failed to queue (MBZ)	*/
> +	uint32_t b_fail;		/* Doorbell failure (MBZ)	*/

Why MBZ? It is not all used in this context so this will just confuse 
people.

> +	int retcode;			/* Result of last guc_submit()	*/
>   };
>
>   enum intel_guc_fw_status {
>


Regards,

Tvrtko
Dave Gordon May 6, 2016, 3:17 p.m. UTC | #3
On 29/04/16 16:45, Tvrtko Ursulin wrote:
>
> One late comment:
>
> On 27/04/16 19:03, Dave Gordon wrote:
>> Rather than wait to see whether more space becomes available in the GuC
>> submission workqueue, we can just return -EAGAIN and let the caller try
>> again in a little while. This gets rid of an uninterruptable sleep in
>> the polling code :)
>>
>> We'll also add a counter to the GuC client statistics, to see how often
>> we find the WQ full.
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c        |  1 +
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 16 +++++-----------
>>   drivers/gpu/drm/i915/intel_guc.h           |  8 ++++----
>>   3 files changed, 10 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 8b8d6f0..1024947 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -2509,6 +2509,7 @@ static void i915_guc_client_info(struct seq_file
>> *m,
>>       seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
>>           client->wq_size, client->wq_offset, client->wq_tail);
>>
>> +    seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
>>       seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
>>       seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
>>       seq_printf(m, "\tLast submission result: %d\n", client->retcode);
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 66af5ce..6626eff 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -453,27 +453,21 @@ static void guc_fini_ctx_desc(struct intel_guc
>> *guc,
>>
>>   int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>>   {
>> -    const size_t size = sizeof(struct guc_wq_item);
>> +    const size_t wqi_size = sizeof(struct guc_wq_item);
>>       struct i915_guc_client *gc = request->i915->guc.execbuf_client;
>>       struct guc_process_desc *desc;
>> -    int ret = -ETIMEDOUT, timeout_counter = 200;
>>
>>       if (!gc)
>>           return 0;
>>
>>       desc = gc->client_base + gc->proc_desc_offset;
>>
>> -    while (timeout_counter-- > 0) {
>> -        if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
>> -            ret = 0;
>> -            break;
>> -        }
>> +    if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= wqi_size)
>> +        return 0;
>>
>> -        if (timeout_counter)
>> -            usleep_range(1000, 2000);
>> -    };
>> +    gc->no_wq_space += 1;
>>
>> -    return ret;
>> +    return -EAGAIN;
>>   }
>>
>>   static int guc_add_workqueue_item(struct i915_guc_client *gc,
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index b37c731..436f2d6 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -73,10 +73,10 @@ struct i915_guc_client {
>>
>>       /* GuC submission statistics & status */
>>       uint64_t submissions[GUC_MAX_ENGINES_NUM];
>> -    uint32_t q_fail;
>> -    uint32_t b_fail;
>> -    int retcode;
>> -    int spare;            /* pad to 32 DWords        */
>> +    uint32_t no_wq_space;        /* Space pre-check failed    */
>> +    uint32_t q_fail;        /* Failed to queue (MBZ)    */
>> +    uint32_t b_fail;        /* Doorbell failure (MBZ)    */
>
> Why MBZ? It is not all used in this context so this will just confuse
> people.

MBZ => Must Be Zero. As in, we can't really deal with the events that 
cause these counters to be incremented, so if they're nonzero, something 
is broken and the driver may or may not recover :(

If the call protocol is changed, the MBZ variables may go away entirely.

.Dave.

>> +    int retcode;            /* Result of last guc_submit()    */
>>   };
>>
>>   enum intel_guc_fw_status {
>
>
> Regards,
>
> Tvrtko
Dave Gordon May 6, 2016, 5:12 p.m. UTC | #4
On 29/04/16 16:17, Tvrtko Ursulin wrote:
>
> On 27/04/16 19:03, Dave Gordon wrote:
>> Rather than wait to see whether more space becomes available in the GuC
>> submission workqueue, we can just return -EAGAIN and let the caller try
>> again in a little while. This gets rid of an uninterruptable sleep in
>> the polling code :)
>>
>> We'll also add a counter to the GuC client statistics, to see how often
>> we find the WQ full.
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c        |  1 +
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 16 +++++-----------
>>   drivers/gpu/drm/i915/intel_guc.h           |  8 ++++----
>>   3 files changed, 10 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 8b8d6f0..1024947 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -2509,6 +2509,7 @@ static void i915_guc_client_info(struct seq_file
>> *m,
>>       seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
>>           client->wq_size, client->wq_offset, client->wq_tail);
>>
>> +    seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
>>       seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
>>       seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
>>       seq_printf(m, "\tLast submission result: %d\n", client->retcode);
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 66af5ce..6626eff 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -453,27 +453,21 @@ static void guc_fini_ctx_desc(struct intel_guc
>> *guc,
>>
>>   int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>>   {
>> -    const size_t size = sizeof(struct guc_wq_item);
>> +    const size_t wqi_size = sizeof(struct guc_wq_item);
>>       struct i915_guc_client *gc = request->i915->guc.execbuf_client;
>>       struct guc_process_desc *desc;
>> -    int ret = -ETIMEDOUT, timeout_counter = 200;
>>
>>       if (!gc)
>>           return 0;
>
> Not part of this patch but spotted it - can this really happen, and if
> so, does it warrant a WARN_ONCE, GEM_BUG_ON or something?

It certainly shouldn't, as this function shouldn't be called unless 
enable_guc_submission is nonzero, and it's set to zero if client 
allocation fails. So it could be a GEM_BUG_ON(), and it should be part 
of the previous patch, which changed this from a parameter to getting it 
out of the dev_private.

>>       desc = gc->client_base + gc->proc_desc_offset;
>>
>> -    while (timeout_counter-- > 0) {
>> -        if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
>> -            ret = 0;
>> -            break;
>> -        }
>> +    if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= wqi_size)
>> +        return 0;
>
> Could stick a likely here to lay out the code for the expected case but
> it could be just OCD. :)

OK, though it makes the line too long. :-/
I'll just calculate the freespace, then compare separately.

>> -        if (timeout_counter)
>> -            usleep_range(1000, 2000);
>> -    };
>> +    gc->no_wq_space += 1;
>>
>> -    return ret;
>> +    return -EAGAIN;
>
> I suppose it is OK to make userspace pay the cost since this should be
> extremely rare, correct?

It *should* be rare. It could mean the GuC has hung, or the GPU has hung 
so the GuC can't dequeue anything. Whatever it means, someone needs to 
back off. Then maybe TDR will cut in -- or maybe stuff will complete 
after all.

If the errno gets back to userland, it can just retry, or wait for some 
other request to complete first. It doesn't have to be userland, though; 
the scheduler will also back off and try again later in this situation.

Either way, anything is better than sleeping (uninterruptibly) in the 
driver!

>>   }
>>
>>   static int guc_add_workqueue_item(struct i915_guc_client *gc,
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index b37c731..436f2d6 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -73,10 +73,10 @@ struct i915_guc_client {
>>
>>       /* GuC submission statistics & status */
>>       uint64_t submissions[GUC_MAX_ENGINES_NUM];
>> -    uint32_t q_fail;
>> -    uint32_t b_fail;
>> -    int retcode;
>> -    int spare;            /* pad to 32 DWords        */
>> +    uint32_t no_wq_space;        /* Space pre-check failed    */
>> +    uint32_t q_fail;        /* Failed to queue (MBZ)    */
>> +    uint32_t b_fail;        /* Doorbell failure (MBZ)    */
>> +    int retcode;            /* Result of last guc_submit()    */
>>   };
>>
>>   enum intel_guc_fw_status {
>>
>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Regards,
> Tvrtko

Thanks,
.Dave.
Tvrtko Ursulin May 10, 2016, 2:44 p.m. UTC | #5
On 06/05/16 16:17, Dave Gordon wrote:
> On 29/04/16 16:45, Tvrtko Ursulin wrote:
>>
>> One late comment:
>>
>> On 27/04/16 19:03, Dave Gordon wrote:
>>> Rather than wait to see whether more space becomes available in the GuC
>>> submission workqueue, we can just return -EAGAIN and let the caller try
>>> again in a little while. This gets rid of an uninterruptable sleep in
>>> the polling code :)
>>>
>>> We'll also add a counter to the GuC client statistics, to see how often
>>> we find the WQ full.
>>>
>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_debugfs.c        |  1 +
>>>   drivers/gpu/drm/i915/i915_guc_submission.c | 16 +++++-----------
>>>   drivers/gpu/drm/i915/intel_guc.h           |  8 ++++----
>>>   3 files changed, 10 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>>> b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index 8b8d6f0..1024947 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -2509,6 +2509,7 @@ static void i915_guc_client_info(struct seq_file
>>> *m,
>>>       seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
>>>           client->wq_size, client->wq_offset, client->wq_tail);
>>>
>>> +    seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
>>>       seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
>>>       seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
>>>       seq_printf(m, "\tLast submission result: %d\n", client->retcode);
>>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> index 66af5ce..6626eff 100644
>>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> @@ -453,27 +453,21 @@ static void guc_fini_ctx_desc(struct intel_guc
>>> *guc,
>>>
>>>   int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>>>   {
>>> -    const size_t size = sizeof(struct guc_wq_item);
>>> +    const size_t wqi_size = sizeof(struct guc_wq_item);
>>>       struct i915_guc_client *gc = request->i915->guc.execbuf_client;
>>>       struct guc_process_desc *desc;
>>> -    int ret = -ETIMEDOUT, timeout_counter = 200;
>>>
>>>       if (!gc)
>>>           return 0;
>>>
>>>       desc = gc->client_base + gc->proc_desc_offset;
>>>
>>> -    while (timeout_counter-- > 0) {
>>> -        if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
>>> -            ret = 0;
>>> -            break;
>>> -        }
>>> +    if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= wqi_size)
>>> +        return 0;
>>>
>>> -        if (timeout_counter)
>>> -            usleep_range(1000, 2000);
>>> -    };
>>> +    gc->no_wq_space += 1;
>>>
>>> -    return ret;
>>> +    return -EAGAIN;
>>>   }
>>>
>>>   static int guc_add_workqueue_item(struct i915_guc_client *gc,
>>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>>> b/drivers/gpu/drm/i915/intel_guc.h
>>> index b37c731..436f2d6 100644
>>> --- a/drivers/gpu/drm/i915/intel_guc.h
>>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>>> @@ -73,10 +73,10 @@ struct i915_guc_client {
>>>
>>>       /* GuC submission statistics & status */
>>>       uint64_t submissions[GUC_MAX_ENGINES_NUM];
>>> -    uint32_t q_fail;
>>> -    uint32_t b_fail;
>>> -    int retcode;
>>> -    int spare;            /* pad to 32 DWords        */
>>> +    uint32_t no_wq_space;        /* Space pre-check failed    */
>>> +    uint32_t q_fail;        /* Failed to queue (MBZ)    */
>>> +    uint32_t b_fail;        /* Doorbell failure (MBZ)    */
>>
>> Why MBZ? It is not all used in this context so this will just confuse
>> people.
>
> MBZ => Must Be Zero. As in, we can't really deal with the events that
> cause these counters to be incremented, so if they're nonzero, something
> is broken and the driver may or may not recover :(
>
> If the call protocol is changed, the MBZ variables may go away entirely.

My objection is that when someone sees MBZ they'll wrongly think this 
structure is shared with the hardware. Since it is just a software 
counter MBZ is a confusing marker to use.

Instead these fields should probably just be unsigned ints with comments 
saying something to the effect of what you wrote above.

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 8b8d6f0..1024947 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2509,6 +2509,7 @@  static void i915_guc_client_info(struct seq_file *m,
 	seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
 		client->wq_size, client->wq_offset, client->wq_tail);
 
+	seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
 	seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
 	seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
 	seq_printf(m, "\tLast submission result: %d\n", client->retcode);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 66af5ce..6626eff 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -453,27 +453,21 @@  static void guc_fini_ctx_desc(struct intel_guc *guc,
 
 int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
 {
-	const size_t size = sizeof(struct guc_wq_item);
+	const size_t wqi_size = sizeof(struct guc_wq_item);
 	struct i915_guc_client *gc = request->i915->guc.execbuf_client;
 	struct guc_process_desc *desc;
-	int ret = -ETIMEDOUT, timeout_counter = 200;
 
 	if (!gc)
 		return 0;
 
 	desc = gc->client_base + gc->proc_desc_offset;
 
-	while (timeout_counter-- > 0) {
-		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
-			ret = 0;
-			break;
-		}
+	if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= wqi_size)
+		return 0;
 
-		if (timeout_counter)
-			usleep_range(1000, 2000);
-	};
+	gc->no_wq_space += 1;
 
-	return ret;
+	return -EAGAIN;
 }
 
 static int guc_add_workqueue_item(struct i915_guc_client *gc,
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index b37c731..436f2d6 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -73,10 +73,10 @@  struct i915_guc_client {
 
 	/* GuC submission statistics & status */
 	uint64_t submissions[GUC_MAX_ENGINES_NUM];
-	uint32_t q_fail;
-	uint32_t b_fail;
-	int retcode;
-	int spare;			/* pad to 32 DWords		*/
+	uint32_t no_wq_space;		/* Space pre-check failed	*/
+	uint32_t q_fail;		/* Failed to queue (MBZ)	*/
+	uint32_t b_fail;		/* Doorbell failure (MBZ)	*/
+	int retcode;			/* Result of last guc_submit()	*/
 };
 
 enum intel_guc_fw_status {