diff mbox series

[6/7] drm/i915/guc: Optimize CTB writes and reads

Message ID 20210701171550.49353-7-matthew.brost@intel.com (mailing list archive)
State New, archived
Headers show
Series CT changes required for GuC submission | expand

Commit Message

Matthew Brost July 1, 2021, 5:15 p.m. UTC
CTB writes are now in the path of command submission and should be
optimized for performance. Rather than reading CTB descriptor values
(e.g. head, tail) which could result in accesses across the PCIe bus,
store shadow local copies and only read/write the descriptor values when
absolutely necessary. Also store the current space in the each channel
locally.

v2:
 (Michel)
  - Add additional sanity checks for head / tail pointers
  - Use GUC_CTB_HDR_LEN rather than magic 1

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 88 +++++++++++++++--------
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  6 ++
 2 files changed, 65 insertions(+), 29 deletions(-)

Comments

John Harrison July 6, 2021, 7 p.m. UTC | #1
On 7/1/2021 10:15, Matthew Brost wrote:
> CTB writes are now in the path of command submission and should be
> optimized for performance. Rather than reading CTB descriptor values
> (e.g. head, tail) which could result in accesses across the PCIe bus,
> store shadow local copies and only read/write the descriptor values when
> absolutely necessary. Also store the current space in the each channel
> locally.
>
> v2:
>   (Michel)
>    - Add additional sanity checks for head / tail pointers
>    - Use GUC_CTB_HDR_LEN rather than magic 1
>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 88 +++++++++++++++--------
>   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  6 ++
>   2 files changed, 65 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> index a9cb7b608520..5b8b4ff609e2 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> @@ -130,6 +130,10 @@ static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc)
>   static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb)
>   {
>   	ctb->broken = false;
> +	ctb->tail = 0;
> +	ctb->head = 0;
> +	ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size);
> +
>   	guc_ct_buffer_desc_init(ctb->desc);
>   }
>   
> @@ -383,10 +387,8 @@ static int ct_write(struct intel_guc_ct *ct,
>   {
>   	struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>   	struct guc_ct_buffer_desc *desc = ctb->desc;
> -	u32 head = desc->head;
> -	u32 tail = desc->tail;
> +	u32 tail = ctb->tail;
>   	u32 size = ctb->size;
> -	u32 used;
>   	u32 header;
>   	u32 hxg;
>   	u32 *cmds = ctb->cmds;
> @@ -395,25 +397,22 @@ static int ct_write(struct intel_guc_ct *ct,
>   	if (unlikely(desc->status))
>   		goto corrupted;
>   
> -	if (unlikely((tail | head) >= size)) {
> +	GEM_BUG_ON(tail > size);
> +
> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
> +	if (unlikely(tail != READ_ONCE(desc->tail))) {
> +		CT_ERROR(ct, "Tail was modified %u != %u\n",
> +			 desc->tail, ctb->tail);
> +		desc->status |= GUC_CTB_STATUS_MISMATCH;
> +		goto corrupted;
> +	}
> +	if (unlikely((desc->tail | desc->head) >= size)) {
>   		CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
> -			 head, tail, size);
> +			 desc->head, desc->tail, size);
>   		desc->status |= GUC_CTB_STATUS_OVERFLOW;
>   		goto corrupted;
>   	}
> -
> -	/*
> -	 * tail == head condition indicates empty. GuC FW does not support
> -	 * using up the entire buffer to get tail == head meaning full.
> -	 */
> -	if (tail < head)
> -		used = (size - head) + tail;
> -	else
> -		used = tail - head;
> -
> -	/* make sure there is a space including extra dw for the fence */
> -	if (unlikely(used + len + GUC_CTB_HDR_LEN >= size))
> -		return -ENOSPC;
> +#endif
>   
>   	/*
>   	 * dw0: CT header (including fence)
> @@ -454,7 +453,9 @@ static int ct_write(struct intel_guc_ct *ct,
>   	write_barrier(ct);
>   
>   	/* now update descriptor */
> +	ctb->tail = tail;
>   	WRITE_ONCE(desc->tail, tail);
> +	ctb->space -= len + GUC_CTB_HDR_LEN;
>   
>   	return 0;
>   
> @@ -470,7 +471,7 @@ static int ct_write(struct intel_guc_ct *ct,
>    * @req:	pointer to pending request
>    * @status:	placeholder for status
>    *
> - * For each sent request, Guc shall send bac CT response message.
> + * For each sent request, GuC shall send back CT response message.
>    * Our message handler will update status of tracked request once
>    * response message with given fence is received. Wait here and
>    * check for valid response status value.
> @@ -526,24 +527,35 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct)
>   	return ret;
>   }
>   
> -static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb, u32 len_dw)
> +static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
>   {
> -	struct guc_ct_buffer_desc *desc = ctb->desc;
> -	u32 head = READ_ONCE(desc->head);
> +	struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> +	u32 head;
>   	u32 space;
>   
> -	space = CIRC_SPACE(desc->tail, head, ctb->size);
> +	if (ctb->space >= len_dw)
> +		return true;
> +
> +	head = READ_ONCE(ctb->desc->head);
> +	if (unlikely(head > ctb->size)) {
> +		CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u size=%u\n",
> +			 ctb->desc->head, ctb->desc->tail, ctb->size);
> +		ctb->desc->status |= GUC_CTB_STATUS_OVERFLOW;
> +		ctb->broken = true;
> +		return false;
> +	}
> +
> +	space = CIRC_SPACE(ctb->tail, head, ctb->size);
> +	ctb->space = space;
>   
>   	return space >= len_dw;
>   }
>   
>   static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw)
>   {
> -	struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> -
>   	lockdep_assert_held(&ct->ctbs.send.lock);
>   
> -	if (unlikely(!h2g_has_room(ctb, len_dw))) {
> +	if (unlikely(!h2g_has_room(ct, len_dw))) {
>   		if (ct->stall_time == KTIME_MAX)
>   			ct->stall_time = ktime_get();
>   
> @@ -613,7 +625,7 @@ static int ct_send(struct intel_guc_ct *ct,
>   	 */
>   retry:
>   	spin_lock_irqsave(&ctb->lock, flags);
> -	if (unlikely(!h2g_has_room(ctb, len + GUC_CTB_HDR_LEN))) {
> +	if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
>   		if (ct->stall_time == KTIME_MAX)
>   			ct->stall_time = ktime_get();
>   		spin_unlock_irqrestore(&ctb->lock, flags);
> @@ -733,7 +745,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>   {
>   	struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv;
>   	struct guc_ct_buffer_desc *desc = ctb->desc;
> -	u32 head = desc->head;
> +	u32 head = ctb->head;
>   	u32 tail = desc->tail;
>   	u32 size = ctb->size;
>   	u32 *cmds = ctb->cmds;
> @@ -748,12 +760,29 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>   	if (unlikely(desc->status))
>   		goto corrupted;
>   
> -	if (unlikely((tail | head) >= size)) {
> +	GEM_BUG_ON(head > size);
Is the BUG_ON necessary given that both options below do the same check 
but as a corrupted buffer test (with subsequent recovery by GT reset?) 
rather than killing the driver.

> +
> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
> +	if (unlikely(head != READ_ONCE(desc->head))) {
> +		CT_ERROR(ct, "Head was modified %u != %u\n",
> +			 desc->head, ctb->head);
> +		desc->status |= GUC_CTB_STATUS_MISMATCH;
> +		goto corrupted;
> +	}
> +	if (unlikely((desc->tail | desc->head) >= size)) {
> +		CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
> +			 head, tail, size);
> +		desc->status |= GUC_CTB_STATUS_OVERFLOW;
> +		goto corrupted;
> +	}
> +#else
> +	if (unlikely((tail | ctb->head) >= size)) {
Could just be 'head' rather than 'ctb->head'.

John.

>   		CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
>   			 head, tail, size);
>   		desc->status |= GUC_CTB_STATUS_OVERFLOW;
>   		goto corrupted;
>   	}
> +#endif
>   
>   	/* tail == head condition indicates empty */
>   	available = tail - head;
> @@ -803,6 +832,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>   	}
>   	CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg);
>   
> +	ctb->head = head;
>   	/* now update descriptor */
>   	WRITE_ONCE(desc->head, head);
>   
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> index bee03794c1eb..edd1bba0445d 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> @@ -33,6 +33,9 @@ struct intel_guc;
>    * @desc: pointer to the buffer descriptor
>    * @cmds: pointer to the commands buffer
>    * @size: size of the commands buffer in dwords
> + * @head: local shadow copy of head in dwords
> + * @tail: local shadow copy of tail in dwords
> + * @space: local shadow copy of space in dwords
>    * @broken: flag to indicate if descriptor data is broken
>    */
>   struct intel_guc_ct_buffer {
> @@ -40,6 +43,9 @@ struct intel_guc_ct_buffer {
>   	struct guc_ct_buffer_desc *desc;
>   	u32 *cmds;
>   	u32 size;
> +	u32 tail;
> +	u32 head;
> +	u32 space;
>   	bool broken;
>   };
>
Michal Wajdeczko July 6, 2021, 7:12 p.m. UTC | #2
On 06.07.2021 21:00, John Harrison wrote:
> On 7/1/2021 10:15, Matthew Brost wrote:
>> CTB writes are now in the path of command submission and should be
>> optimized for performance. Rather than reading CTB descriptor values
>> (e.g. head, tail) which could result in accesses across the PCIe bus,
>> store shadow local copies and only read/write the descriptor values when
>> absolutely necessary. Also store the current space in the each channel
>> locally.
>>
>> v2:
>>   (Michel)
>>    - Add additional sanity checks for head / tail pointers
>>    - Use GUC_CTB_HDR_LEN rather than magic 1
>>
>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 88 +++++++++++++++--------
>>   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  6 ++
>>   2 files changed, 65 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> index a9cb7b608520..5b8b4ff609e2 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> @@ -130,6 +130,10 @@ static void guc_ct_buffer_desc_init(struct
>> guc_ct_buffer_desc *desc)
>>   static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb)
>>   {
>>       ctb->broken = false;
>> +    ctb->tail = 0;
>> +    ctb->head = 0;
>> +    ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size);
>> +
>>       guc_ct_buffer_desc_init(ctb->desc);
>>   }
>>   @@ -383,10 +387,8 @@ static int ct_write(struct intel_guc_ct *ct,
>>   {
>>       struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>>       struct guc_ct_buffer_desc *desc = ctb->desc;
>> -    u32 head = desc->head;
>> -    u32 tail = desc->tail;
>> +    u32 tail = ctb->tail;
>>       u32 size = ctb->size;
>> -    u32 used;
>>       u32 header;
>>       u32 hxg;
>>       u32 *cmds = ctb->cmds;
>> @@ -395,25 +397,22 @@ static int ct_write(struct intel_guc_ct *ct,
>>       if (unlikely(desc->status))
>>           goto corrupted;
>>   -    if (unlikely((tail | head) >= size)) {
>> +    GEM_BUG_ON(tail > size);
>> +
>> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
>> +    if (unlikely(tail != READ_ONCE(desc->tail))) {
>> +        CT_ERROR(ct, "Tail was modified %u != %u\n",
>> +             desc->tail, ctb->tail);
>> +        desc->status |= GUC_CTB_STATUS_MISMATCH;
>> +        goto corrupted;
>> +    }
>> +    if (unlikely((desc->tail | desc->head) >= size)) {
>>           CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
>> -             head, tail, size);
>> +             desc->head, desc->tail, size);
>>           desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>           goto corrupted;
>>       }
>> -
>> -    /*
>> -     * tail == head condition indicates empty. GuC FW does not support
>> -     * using up the entire buffer to get tail == head meaning full.
>> -     */
>> -    if (tail < head)
>> -        used = (size - head) + tail;
>> -    else
>> -        used = tail - head;
>> -
>> -    /* make sure there is a space including extra dw for the fence */
>> -    if (unlikely(used + len + GUC_CTB_HDR_LEN >= size))
>> -        return -ENOSPC;
>> +#endif
>>         /*
>>        * dw0: CT header (including fence)
>> @@ -454,7 +453,9 @@ static int ct_write(struct intel_guc_ct *ct,
>>       write_barrier(ct);
>>         /* now update descriptor */
>> +    ctb->tail = tail;
>>       WRITE_ONCE(desc->tail, tail);
>> +    ctb->space -= len + GUC_CTB_HDR_LEN;
>>         return 0;
>>   @@ -470,7 +471,7 @@ static int ct_write(struct intel_guc_ct *ct,
>>    * @req:    pointer to pending request
>>    * @status:    placeholder for status
>>    *
>> - * For each sent request, Guc shall send bac CT response message.
>> + * For each sent request, GuC shall send back CT response message.
>>    * Our message handler will update status of tracked request once
>>    * response message with given fence is received. Wait here and
>>    * check for valid response status value.
>> @@ -526,24 +527,35 @@ static inline bool ct_deadlocked(struct
>> intel_guc_ct *ct)
>>       return ret;
>>   }
>>   -static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb,
>> u32 len_dw)
>> +static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
>>   {
>> -    struct guc_ct_buffer_desc *desc = ctb->desc;
>> -    u32 head = READ_ONCE(desc->head);
>> +    struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>> +    u32 head;
>>       u32 space;
>>   -    space = CIRC_SPACE(desc->tail, head, ctb->size);
>> +    if (ctb->space >= len_dw)
>> +        return true;
>> +
>> +    head = READ_ONCE(ctb->desc->head);
>> +    if (unlikely(head > ctb->size)) {
>> +        CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u size=%u\n",
>> +             ctb->desc->head, ctb->desc->tail, ctb->size);
>> +        ctb->desc->status |= GUC_CTB_STATUS_OVERFLOW;
>> +        ctb->broken = true;
>> +        return false;
>> +    }
>> +
>> +    space = CIRC_SPACE(ctb->tail, head, ctb->size);
>> +    ctb->space = space;
>>         return space >= len_dw;
>>   }
>>     static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw)
>>   {
>> -    struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>> -
>>       lockdep_assert_held(&ct->ctbs.send.lock);
>>   -    if (unlikely(!h2g_has_room(ctb, len_dw))) {
>> +    if (unlikely(!h2g_has_room(ct, len_dw))) {
>>           if (ct->stall_time == KTIME_MAX)
>>               ct->stall_time = ktime_get();
>>   @@ -613,7 +625,7 @@ static int ct_send(struct intel_guc_ct *ct,
>>        */
>>   retry:
>>       spin_lock_irqsave(&ctb->lock, flags);
>> -    if (unlikely(!h2g_has_room(ctb, len + GUC_CTB_HDR_LEN))) {
>> +    if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
>>           if (ct->stall_time == KTIME_MAX)
>>               ct->stall_time = ktime_get();
>>           spin_unlock_irqrestore(&ctb->lock, flags);
>> @@ -733,7 +745,7 @@ static int ct_read(struct intel_guc_ct *ct, struct
>> ct_incoming_msg **msg)
>>   {
>>       struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv;
>>       struct guc_ct_buffer_desc *desc = ctb->desc;
>> -    u32 head = desc->head;
>> +    u32 head = ctb->head;
>>       u32 tail = desc->tail;
>>       u32 size = ctb->size;
>>       u32 *cmds = ctb->cmds;
>> @@ -748,12 +760,29 @@ static int ct_read(struct intel_guc_ct *ct,
>> struct ct_incoming_msg **msg)
>>       if (unlikely(desc->status))
>>           goto corrupted;
>>   -    if (unlikely((tail | head) >= size)) {
>> +    GEM_BUG_ON(head > size);
> Is the BUG_ON necessary given that both options below do the same check
> but as a corrupted buffer test (with subsequent recovery by GT reset?)
> rather than killing the driver.

"head" and "size" are now fully owned by the driver.
BUGON here is to make sure driver is coded correctly.

> 
>> +
>> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
>> +    if (unlikely(head != READ_ONCE(desc->head))) {
>> +        CT_ERROR(ct, "Head was modified %u != %u\n",
>> +             desc->head, ctb->head);
>> +        desc->status |= GUC_CTB_STATUS_MISMATCH;
>> +        goto corrupted;
>> +    }
>> +    if (unlikely((desc->tail | desc->head) >= size)) {
>> +        CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
>> +             head, tail, size);
>> +        desc->status |= GUC_CTB_STATUS_OVERFLOW;
>> +        goto corrupted;
>> +    }
>> +#else
>> +    if (unlikely((tail | ctb->head) >= size)) {
> Could just be 'head' rather than 'ctb->head'.

or drop "ctb->head" completely since this is driver owned field and
above you already have BUGON to test it

Michal

> 
> John.
> 
>>           CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
>>                head, tail, size);
>>           desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>           goto corrupted;
>>       }
>> +#endif
>>         /* tail == head condition indicates empty */
>>       available = tail - head;
>> @@ -803,6 +832,7 @@ static int ct_read(struct intel_guc_ct *ct, struct
>> ct_incoming_msg **msg)
>>       }
>>       CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg);
>>   +    ctb->head = head;
>>       /* now update descriptor */
>>       WRITE_ONCE(desc->head, head);
>>   diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>> index bee03794c1eb..edd1bba0445d 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>> @@ -33,6 +33,9 @@ struct intel_guc;
>>    * @desc: pointer to the buffer descriptor
>>    * @cmds: pointer to the commands buffer
>>    * @size: size of the commands buffer in dwords
>> + * @head: local shadow copy of head in dwords
>> + * @tail: local shadow copy of tail in dwords
>> + * @space: local shadow copy of space in dwords
>>    * @broken: flag to indicate if descriptor data is broken
>>    */
>>   struct intel_guc_ct_buffer {
>> @@ -40,6 +43,9 @@ struct intel_guc_ct_buffer {
>>       struct guc_ct_buffer_desc *desc;
>>       u32 *cmds;
>>       u32 size;
>> +    u32 tail;
>> +    u32 head;
>> +    u32 space;
>>       bool broken;
>>   };
>>   
>
John Harrison July 6, 2021, 7:19 p.m. UTC | #3
On 7/6/2021 12:12, Michal Wajdeczko wrote:
> On 06.07.2021 21:00, John Harrison wrote:
>> On 7/1/2021 10:15, Matthew Brost wrote:
>>> CTB writes are now in the path of command submission and should be
>>> optimized for performance. Rather than reading CTB descriptor values
>>> (e.g. head, tail) which could result in accesses across the PCIe bus,
>>> store shadow local copies and only read/write the descriptor values when
>>> absolutely necessary. Also store the current space in the each channel
>>> locally.
>>>
>>> v2:
>>>    (Michel)
>>>     - Add additional sanity checks for head / tail pointers
>>>     - Use GUC_CTB_HDR_LEN rather than magic 1
>>>
>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 88 +++++++++++++++--------
>>>    drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  6 ++
>>>    2 files changed, 65 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>> index a9cb7b608520..5b8b4ff609e2 100644
>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>> @@ -130,6 +130,10 @@ static void guc_ct_buffer_desc_init(struct
>>> guc_ct_buffer_desc *desc)
>>>    static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb)
>>>    {
>>>        ctb->broken = false;
>>> +    ctb->tail = 0;
>>> +    ctb->head = 0;
>>> +    ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size);
>>> +
>>>        guc_ct_buffer_desc_init(ctb->desc);
>>>    }
>>>    @@ -383,10 +387,8 @@ static int ct_write(struct intel_guc_ct *ct,
>>>    {
>>>        struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>>>        struct guc_ct_buffer_desc *desc = ctb->desc;
>>> -    u32 head = desc->head;
>>> -    u32 tail = desc->tail;
>>> +    u32 tail = ctb->tail;
>>>        u32 size = ctb->size;
>>> -    u32 used;
>>>        u32 header;
>>>        u32 hxg;
>>>        u32 *cmds = ctb->cmds;
>>> @@ -395,25 +397,22 @@ static int ct_write(struct intel_guc_ct *ct,
>>>        if (unlikely(desc->status))
>>>            goto corrupted;
>>>    -    if (unlikely((tail | head) >= size)) {
>>> +    GEM_BUG_ON(tail > size);
>>> +
>>> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
>>> +    if (unlikely(tail != READ_ONCE(desc->tail))) {
>>> +        CT_ERROR(ct, "Tail was modified %u != %u\n",
>>> +             desc->tail, ctb->tail);
>>> +        desc->status |= GUC_CTB_STATUS_MISMATCH;
>>> +        goto corrupted;
>>> +    }
>>> +    if (unlikely((desc->tail | desc->head) >= size)) {
>>>            CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
>>> -             head, tail, size);
>>> +             desc->head, desc->tail, size);
>>>            desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>            goto corrupted;
>>>        }
>>> -
>>> -    /*
>>> -     * tail == head condition indicates empty. GuC FW does not support
>>> -     * using up the entire buffer to get tail == head meaning full.
>>> -     */
>>> -    if (tail < head)
>>> -        used = (size - head) + tail;
>>> -    else
>>> -        used = tail - head;
>>> -
>>> -    /* make sure there is a space including extra dw for the fence */
>>> -    if (unlikely(used + len + GUC_CTB_HDR_LEN >= size))
>>> -        return -ENOSPC;
>>> +#endif
>>>          /*
>>>         * dw0: CT header (including fence)
>>> @@ -454,7 +453,9 @@ static int ct_write(struct intel_guc_ct *ct,
>>>        write_barrier(ct);
>>>          /* now update descriptor */
>>> +    ctb->tail = tail;
>>>        WRITE_ONCE(desc->tail, tail);
>>> +    ctb->space -= len + GUC_CTB_HDR_LEN;
>>>          return 0;
>>>    @@ -470,7 +471,7 @@ static int ct_write(struct intel_guc_ct *ct,
>>>     * @req:    pointer to pending request
>>>     * @status:    placeholder for status
>>>     *
>>> - * For each sent request, Guc shall send bac CT response message.
>>> + * For each sent request, GuC shall send back CT response message.
>>>     * Our message handler will update status of tracked request once
>>>     * response message with given fence is received. Wait here and
>>>     * check for valid response status value.
>>> @@ -526,24 +527,35 @@ static inline bool ct_deadlocked(struct
>>> intel_guc_ct *ct)
>>>        return ret;
>>>    }
>>>    -static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb,
>>> u32 len_dw)
>>> +static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
>>>    {
>>> -    struct guc_ct_buffer_desc *desc = ctb->desc;
>>> -    u32 head = READ_ONCE(desc->head);
>>> +    struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>>> +    u32 head;
>>>        u32 space;
>>>    -    space = CIRC_SPACE(desc->tail, head, ctb->size);
>>> +    if (ctb->space >= len_dw)
>>> +        return true;
>>> +
>>> +    head = READ_ONCE(ctb->desc->head);
>>> +    if (unlikely(head > ctb->size)) {
>>> +        CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u size=%u\n",
>>> +             ctb->desc->head, ctb->desc->tail, ctb->size);
>>> +        ctb->desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>> +        ctb->broken = true;
>>> +        return false;
>>> +    }
>>> +
>>> +    space = CIRC_SPACE(ctb->tail, head, ctb->size);
>>> +    ctb->space = space;
>>>          return space >= len_dw;
>>>    }
>>>      static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw)
>>>    {
>>> -    struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>>> -
>>>        lockdep_assert_held(&ct->ctbs.send.lock);
>>>    -    if (unlikely(!h2g_has_room(ctb, len_dw))) {
>>> +    if (unlikely(!h2g_has_room(ct, len_dw))) {
>>>            if (ct->stall_time == KTIME_MAX)
>>>                ct->stall_time = ktime_get();
>>>    @@ -613,7 +625,7 @@ static int ct_send(struct intel_guc_ct *ct,
>>>         */
>>>    retry:
>>>        spin_lock_irqsave(&ctb->lock, flags);
>>> -    if (unlikely(!h2g_has_room(ctb, len + GUC_CTB_HDR_LEN))) {
>>> +    if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
>>>            if (ct->stall_time == KTIME_MAX)
>>>                ct->stall_time = ktime_get();
>>>            spin_unlock_irqrestore(&ctb->lock, flags);
>>> @@ -733,7 +745,7 @@ static int ct_read(struct intel_guc_ct *ct, struct
>>> ct_incoming_msg **msg)
>>>    {
>>>        struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv;
>>>        struct guc_ct_buffer_desc *desc = ctb->desc;
>>> -    u32 head = desc->head;
>>> +    u32 head = ctb->head;
>>>        u32 tail = desc->tail;
>>>        u32 size = ctb->size;
>>>        u32 *cmds = ctb->cmds;
>>> @@ -748,12 +760,29 @@ static int ct_read(struct intel_guc_ct *ct,
>>> struct ct_incoming_msg **msg)
>>>        if (unlikely(desc->status))
>>>            goto corrupted;
>>>    -    if (unlikely((tail | head) >= size)) {
>>> +    GEM_BUG_ON(head > size);
>> Is the BUG_ON necessary given that both options below do the same check
>> but as a corrupted buffer test (with subsequent recovery by GT reset?)
>> rather than killing the driver.
> "head" and "size" are now fully owned by the driver.
> BUGON here is to make sure driver is coded correctly.
The point is that both sides of the #if below also validate head. So 
first there is a BUG_ON, then there is the same test but without blowing 
the driver apart. One or the other is not required. My vote would be to 
keep the recoverable test rather than the BUG_ON.

John.

>
>>> +
>>> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
>>> +    if (unlikely(head != READ_ONCE(desc->head))) {
>>> +        CT_ERROR(ct, "Head was modified %u != %u\n",
>>> +             desc->head, ctb->head);
>>> +        desc->status |= GUC_CTB_STATUS_MISMATCH;
>>> +        goto corrupted;
>>> +    }
>>> +    if (unlikely((desc->tail | desc->head) >= size)) {
>>> +        CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
>>> +             head, tail, size);
>>> +        desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>> +        goto corrupted;
>>> +    }
>>> +#else
>>> +    if (unlikely((tail | ctb->head) >= size)) {
>> Could just be 'head' rather than 'ctb->head'.
> or drop "ctb->head" completely since this is driver owned field and
> above you already have BUGON to test it
>
> Michal
>
>> John.
>>
>>>            CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
>>>                 head, tail, size);
>>>            desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>            goto corrupted;
>>>        }
>>> +#endif
>>>          /* tail == head condition indicates empty */
>>>        available = tail - head;
>>> @@ -803,6 +832,7 @@ static int ct_read(struct intel_guc_ct *ct, struct
>>> ct_incoming_msg **msg)
>>>        }
>>>        CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg);
>>>    +    ctb->head = head;
>>>        /* now update descriptor */
>>>        WRITE_ONCE(desc->head, head);
>>>    diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>> index bee03794c1eb..edd1bba0445d 100644
>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>> @@ -33,6 +33,9 @@ struct intel_guc;
>>>     * @desc: pointer to the buffer descriptor
>>>     * @cmds: pointer to the commands buffer
>>>     * @size: size of the commands buffer in dwords
>>> + * @head: local shadow copy of head in dwords
>>> + * @tail: local shadow copy of tail in dwords
>>> + * @space: local shadow copy of space in dwords
>>>     * @broken: flag to indicate if descriptor data is broken
>>>     */
>>>    struct intel_guc_ct_buffer {
>>> @@ -40,6 +43,9 @@ struct intel_guc_ct_buffer {
>>>        struct guc_ct_buffer_desc *desc;
>>>        u32 *cmds;
>>>        u32 size;
>>> +    u32 tail;
>>> +    u32 head;
>>> +    u32 space;
>>>        bool broken;
>>>    };
>>>
Michal Wajdeczko July 6, 2021, 7:33 p.m. UTC | #4
On 06.07.2021 21:19, John Harrison wrote:
> On 7/6/2021 12:12, Michal Wajdeczko wrote:
>> On 06.07.2021 21:00, John Harrison wrote:
>>> On 7/1/2021 10:15, Matthew Brost wrote:
>>>> CTB writes are now in the path of command submission and should be
>>>> optimized for performance. Rather than reading CTB descriptor values
>>>> (e.g. head, tail) which could result in accesses across the PCIe bus,
>>>> store shadow local copies and only read/write the descriptor values
>>>> when
>>>> absolutely necessary. Also store the current space in the each channel
>>>> locally.
>>>>
>>>> v2:
>>>>    (Michel)
>>>>     - Add additional sanity checks for head / tail pointers
>>>>     - Use GUC_CTB_HDR_LEN rather than magic 1
>>>>
>>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 88
>>>> +++++++++++++++--------
>>>>    drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  6 ++
>>>>    2 files changed, 65 insertions(+), 29 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>> index a9cb7b608520..5b8b4ff609e2 100644
>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>> @@ -130,6 +130,10 @@ static void guc_ct_buffer_desc_init(struct
>>>> guc_ct_buffer_desc *desc)
>>>>    static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb)
>>>>    {
>>>>        ctb->broken = false;
>>>> +    ctb->tail = 0;
>>>> +    ctb->head = 0;
>>>> +    ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size);
>>>> +
>>>>        guc_ct_buffer_desc_init(ctb->desc);
>>>>    }
>>>>    @@ -383,10 +387,8 @@ static int ct_write(struct intel_guc_ct *ct,
>>>>    {
>>>>        struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>>>>        struct guc_ct_buffer_desc *desc = ctb->desc;
>>>> -    u32 head = desc->head;
>>>> -    u32 tail = desc->tail;
>>>> +    u32 tail = ctb->tail;
>>>>        u32 size = ctb->size;
>>>> -    u32 used;
>>>>        u32 header;
>>>>        u32 hxg;
>>>>        u32 *cmds = ctb->cmds;
>>>> @@ -395,25 +397,22 @@ static int ct_write(struct intel_guc_ct *ct,
>>>>        if (unlikely(desc->status))
>>>>            goto corrupted;
>>>>    -    if (unlikely((tail | head) >= size)) {
>>>> +    GEM_BUG_ON(tail > size);
>>>> +
>>>> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
>>>> +    if (unlikely(tail != READ_ONCE(desc->tail))) {
>>>> +        CT_ERROR(ct, "Tail was modified %u != %u\n",
>>>> +             desc->tail, ctb->tail);
>>>> +        desc->status |= GUC_CTB_STATUS_MISMATCH;
>>>> +        goto corrupted;
>>>> +    }
>>>> +    if (unlikely((desc->tail | desc->head) >= size)) {
>>>>            CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
>>>> -             head, tail, size);
>>>> +             desc->head, desc->tail, size);
>>>>            desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>>            goto corrupted;
>>>>        }
>>>> -
>>>> -    /*
>>>> -     * tail == head condition indicates empty. GuC FW does not support
>>>> -     * using up the entire buffer to get tail == head meaning full.
>>>> -     */
>>>> -    if (tail < head)
>>>> -        used = (size - head) + tail;
>>>> -    else
>>>> -        used = tail - head;
>>>> -
>>>> -    /* make sure there is a space including extra dw for the fence */
>>>> -    if (unlikely(used + len + GUC_CTB_HDR_LEN >= size))
>>>> -        return -ENOSPC;
>>>> +#endif
>>>>          /*
>>>>         * dw0: CT header (including fence)
>>>> @@ -454,7 +453,9 @@ static int ct_write(struct intel_guc_ct *ct,
>>>>        write_barrier(ct);
>>>>          /* now update descriptor */
>>>> +    ctb->tail = tail;
>>>>        WRITE_ONCE(desc->tail, tail);
>>>> +    ctb->space -= len + GUC_CTB_HDR_LEN;
>>>>          return 0;
>>>>    @@ -470,7 +471,7 @@ static int ct_write(struct intel_guc_ct *ct,
>>>>     * @req:    pointer to pending request
>>>>     * @status:    placeholder for status
>>>>     *
>>>> - * For each sent request, Guc shall send bac CT response message.
>>>> + * For each sent request, GuC shall send back CT response message.
>>>>     * Our message handler will update status of tracked request once
>>>>     * response message with given fence is received. Wait here and
>>>>     * check for valid response status value.
>>>> @@ -526,24 +527,35 @@ static inline bool ct_deadlocked(struct
>>>> intel_guc_ct *ct)
>>>>        return ret;
>>>>    }
>>>>    -static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb,
>>>> u32 len_dw)
>>>> +static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
>>>>    {
>>>> -    struct guc_ct_buffer_desc *desc = ctb->desc;
>>>> -    u32 head = READ_ONCE(desc->head);
>>>> +    struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>>>> +    u32 head;
>>>>        u32 space;
>>>>    -    space = CIRC_SPACE(desc->tail, head, ctb->size);
>>>> +    if (ctb->space >= len_dw)
>>>> +        return true;
>>>> +
>>>> +    head = READ_ONCE(ctb->desc->head);
>>>> +    if (unlikely(head > ctb->size)) {
>>>> +        CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u size=%u\n",
>>>> +             ctb->desc->head, ctb->desc->tail, ctb->size);
>>>> +        ctb->desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>> +        ctb->broken = true;
>>>> +        return false;
>>>> +    }
>>>> +
>>>> +    space = CIRC_SPACE(ctb->tail, head, ctb->size);
>>>> +    ctb->space = space;
>>>>          return space >= len_dw;
>>>>    }
>>>>      static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw)
>>>>    {
>>>> -    struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>>>> -
>>>>        lockdep_assert_held(&ct->ctbs.send.lock);
>>>>    -    if (unlikely(!h2g_has_room(ctb, len_dw))) {
>>>> +    if (unlikely(!h2g_has_room(ct, len_dw))) {
>>>>            if (ct->stall_time == KTIME_MAX)
>>>>                ct->stall_time = ktime_get();
>>>>    @@ -613,7 +625,7 @@ static int ct_send(struct intel_guc_ct *ct,
>>>>         */
>>>>    retry:
>>>>        spin_lock_irqsave(&ctb->lock, flags);
>>>> -    if (unlikely(!h2g_has_room(ctb, len + GUC_CTB_HDR_LEN))) {
>>>> +    if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
>>>>            if (ct->stall_time == KTIME_MAX)
>>>>                ct->stall_time = ktime_get();
>>>>            spin_unlock_irqrestore(&ctb->lock, flags);
>>>> @@ -733,7 +745,7 @@ static int ct_read(struct intel_guc_ct *ct, struct
>>>> ct_incoming_msg **msg)
>>>>    {
>>>>        struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv;
>>>>        struct guc_ct_buffer_desc *desc = ctb->desc;
>>>> -    u32 head = desc->head;
>>>> +    u32 head = ctb->head;
>>>>        u32 tail = desc->tail;
>>>>        u32 size = ctb->size;
>>>>        u32 *cmds = ctb->cmds;
>>>> @@ -748,12 +760,29 @@ static int ct_read(struct intel_guc_ct *ct,
>>>> struct ct_incoming_msg **msg)
>>>>        if (unlikely(desc->status))
>>>>            goto corrupted;
>>>>    -    if (unlikely((tail | head) >= size)) {
>>>> +    GEM_BUG_ON(head > size);
>>> Is the BUG_ON necessary given that both options below do the same check
>>> but as a corrupted buffer test (with subsequent recovery by GT reset?)
>>> rather than killing the driver.
>> "head" and "size" are now fully owned by the driver.
>> BUGON here is to make sure driver is coded correctly.
> The point is that both sides of the #if below also validate head. So

but not the same "head"

under DEBUG we are validating the one from descriptor (together with
tail) - and that should be recoverable as if this fails it was clearly
not our fault.

but under non-DEBUG we were attempting to validate again the local one,
pretending that this is recoverable, but it is not, as this is our fault
(elsewhere in i915 we don't attempt to recover from obvious coding errors).

> first there is a BUG_ON, then there is the same test but without blowing
> the driver apart. One or the other is not required. My vote would be to
> keep the recoverable test rather than the BUG_ON.

IMHO we should keep GEMBUGON and drop redundant check in non-DEBUG.

But let Matt decide.

Michal

> 
> John.
> 
>>
>>>> +
>>>> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
>>>> +    if (unlikely(head != READ_ONCE(desc->head))) {
>>>> +        CT_ERROR(ct, "Head was modified %u != %u\n",
>>>> +             desc->head, ctb->head);
>>>> +        desc->status |= GUC_CTB_STATUS_MISMATCH;
>>>> +        goto corrupted;
>>>> +    }
>>>> +    if (unlikely((desc->tail | desc->head) >= size)) {
>>>> +        CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
>>>> +             head, tail, size);
>>>> +        desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>> +        goto corrupted;
>>>> +    }
>>>> +#else
>>>> +    if (unlikely((tail | ctb->head) >= size)) {
>>> Could just be 'head' rather than 'ctb->head'.
>> or drop "ctb->head" completely since this is driver owned field and
>> above you already have BUGON to test it
>>
>> Michal
>>
>>> John.
>>>
>>>>            CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
>>>>                 head, tail, size);
>>>>            desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>>            goto corrupted;
>>>>        }
>>>> +#endif
>>>>          /* tail == head condition indicates empty */
>>>>        available = tail - head;
>>>> @@ -803,6 +832,7 @@ static int ct_read(struct intel_guc_ct *ct, struct
>>>> ct_incoming_msg **msg)
>>>>        }
>>>>        CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg);
>>>>    +    ctb->head = head;
>>>>        /* now update descriptor */
>>>>        WRITE_ONCE(desc->head, head);
>>>>    diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>>> index bee03794c1eb..edd1bba0445d 100644
>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>>> @@ -33,6 +33,9 @@ struct intel_guc;
>>>>     * @desc: pointer to the buffer descriptor
>>>>     * @cmds: pointer to the commands buffer
>>>>     * @size: size of the commands buffer in dwords
>>>> + * @head: local shadow copy of head in dwords
>>>> + * @tail: local shadow copy of tail in dwords
>>>> + * @space: local shadow copy of space in dwords
>>>>     * @broken: flag to indicate if descriptor data is broken
>>>>     */
>>>>    struct intel_guc_ct_buffer {
>>>> @@ -40,6 +43,9 @@ struct intel_guc_ct_buffer {
>>>>        struct guc_ct_buffer_desc *desc;
>>>>        u32 *cmds;
>>>>        u32 size;
>>>> +    u32 tail;
>>>> +    u32 head;
>>>> +    u32 space;
>>>>        bool broken;
>>>>    };
>>>>    
>
Matthew Brost July 6, 2021, 9:22 p.m. UTC | #5
On Tue, Jul 06, 2021 at 09:33:23PM +0200, Michal Wajdeczko wrote:
> 
> 
> On 06.07.2021 21:19, John Harrison wrote:
> > On 7/6/2021 12:12, Michal Wajdeczko wrote:
> >> On 06.07.2021 21:00, John Harrison wrote:
> >>> On 7/1/2021 10:15, Matthew Brost wrote:
> >>>> CTB writes are now in the path of command submission and should be
> >>>> optimized for performance. Rather than reading CTB descriptor values
> >>>> (e.g. head, tail) which could result in accesses across the PCIe bus,
> >>>> store shadow local copies and only read/write the descriptor values
> >>>> when
> >>>> absolutely necessary. Also store the current space in the each channel
> >>>> locally.
> >>>>
> >>>> v2:
> >>>>    (Michel)
> >>>>     - Add additional sanity checks for head / tail pointers
> >>>>     - Use GUC_CTB_HDR_LEN rather than magic 1
> >>>>
> >>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> >>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> >>>> ---
> >>>>    drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 88
> >>>> +++++++++++++++--------
> >>>>    drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  6 ++
> >>>>    2 files changed, 65 insertions(+), 29 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> >>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> >>>> index a9cb7b608520..5b8b4ff609e2 100644
> >>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> >>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> >>>> @@ -130,6 +130,10 @@ static void guc_ct_buffer_desc_init(struct
> >>>> guc_ct_buffer_desc *desc)
> >>>>    static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb)
> >>>>    {
> >>>>        ctb->broken = false;
> >>>> +    ctb->tail = 0;
> >>>> +    ctb->head = 0;
> >>>> +    ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size);
> >>>> +
> >>>>        guc_ct_buffer_desc_init(ctb->desc);
> >>>>    }
> >>>>    @@ -383,10 +387,8 @@ static int ct_write(struct intel_guc_ct *ct,
> >>>>    {
> >>>>        struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> >>>>        struct guc_ct_buffer_desc *desc = ctb->desc;
> >>>> -    u32 head = desc->head;
> >>>> -    u32 tail = desc->tail;
> >>>> +    u32 tail = ctb->tail;
> >>>>        u32 size = ctb->size;
> >>>> -    u32 used;
> >>>>        u32 header;
> >>>>        u32 hxg;
> >>>>        u32 *cmds = ctb->cmds;
> >>>> @@ -395,25 +397,22 @@ static int ct_write(struct intel_guc_ct *ct,
> >>>>        if (unlikely(desc->status))
> >>>>            goto corrupted;
> >>>>    -    if (unlikely((tail | head) >= size)) {
> >>>> +    GEM_BUG_ON(tail > size);
> >>>> +
> >>>> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
> >>>> +    if (unlikely(tail != READ_ONCE(desc->tail))) {
> >>>> +        CT_ERROR(ct, "Tail was modified %u != %u\n",
> >>>> +             desc->tail, ctb->tail);
> >>>> +        desc->status |= GUC_CTB_STATUS_MISMATCH;
> >>>> +        goto corrupted;
> >>>> +    }
> >>>> +    if (unlikely((desc->tail | desc->head) >= size)) {
> >>>>            CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
> >>>> -             head, tail, size);
> >>>> +             desc->head, desc->tail, size);
> >>>>            desc->status |= GUC_CTB_STATUS_OVERFLOW;
> >>>>            goto corrupted;
> >>>>        }
> >>>> -
> >>>> -    /*
> >>>> -     * tail == head condition indicates empty. GuC FW does not support
> >>>> -     * using up the entire buffer to get tail == head meaning full.
> >>>> -     */
> >>>> -    if (tail < head)
> >>>> -        used = (size - head) + tail;
> >>>> -    else
> >>>> -        used = tail - head;
> >>>> -
> >>>> -    /* make sure there is a space including extra dw for the fence */
> >>>> -    if (unlikely(used + len + GUC_CTB_HDR_LEN >= size))
> >>>> -        return -ENOSPC;
> >>>> +#endif
> >>>>          /*
> >>>>         * dw0: CT header (including fence)
> >>>> @@ -454,7 +453,9 @@ static int ct_write(struct intel_guc_ct *ct,
> >>>>        write_barrier(ct);
> >>>>          /* now update descriptor */
> >>>> +    ctb->tail = tail;
> >>>>        WRITE_ONCE(desc->tail, tail);
> >>>> +    ctb->space -= len + GUC_CTB_HDR_LEN;
> >>>>          return 0;
> >>>>    @@ -470,7 +471,7 @@ static int ct_write(struct intel_guc_ct *ct,
> >>>>     * @req:    pointer to pending request
> >>>>     * @status:    placeholder for status
> >>>>     *
> >>>> - * For each sent request, Guc shall send bac CT response message.
> >>>> + * For each sent request, GuC shall send back CT response message.
> >>>>     * Our message handler will update status of tracked request once
> >>>>     * response message with given fence is received. Wait here and
> >>>>     * check for valid response status value.
> >>>> @@ -526,24 +527,35 @@ static inline bool ct_deadlocked(struct
> >>>> intel_guc_ct *ct)
> >>>>        return ret;
> >>>>    }
> >>>>    -static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb,
> >>>> u32 len_dw)
> >>>> +static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
> >>>>    {
> >>>> -    struct guc_ct_buffer_desc *desc = ctb->desc;
> >>>> -    u32 head = READ_ONCE(desc->head);
> >>>> +    struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> >>>> +    u32 head;
> >>>>        u32 space;
> >>>>    -    space = CIRC_SPACE(desc->tail, head, ctb->size);
> >>>> +    if (ctb->space >= len_dw)
> >>>> +        return true;
> >>>> +
> >>>> +    head = READ_ONCE(ctb->desc->head);
> >>>> +    if (unlikely(head > ctb->size)) {
> >>>> +        CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u size=%u\n",
> >>>> +             ctb->desc->head, ctb->desc->tail, ctb->size);
> >>>> +        ctb->desc->status |= GUC_CTB_STATUS_OVERFLOW;
> >>>> +        ctb->broken = true;
> >>>> +        return false;
> >>>> +    }
> >>>> +
> >>>> +    space = CIRC_SPACE(ctb->tail, head, ctb->size);
> >>>> +    ctb->space = space;
> >>>>          return space >= len_dw;
> >>>>    }
> >>>>      static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw)
> >>>>    {
> >>>> -    struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> >>>> -
> >>>>        lockdep_assert_held(&ct->ctbs.send.lock);
> >>>>    -    if (unlikely(!h2g_has_room(ctb, len_dw))) {
> >>>> +    if (unlikely(!h2g_has_room(ct, len_dw))) {
> >>>>            if (ct->stall_time == KTIME_MAX)
> >>>>                ct->stall_time = ktime_get();
> >>>>    @@ -613,7 +625,7 @@ static int ct_send(struct intel_guc_ct *ct,
> >>>>         */
> >>>>    retry:
> >>>>        spin_lock_irqsave(&ctb->lock, flags);
> >>>> -    if (unlikely(!h2g_has_room(ctb, len + GUC_CTB_HDR_LEN))) {
> >>>> +    if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
> >>>>            if (ct->stall_time == KTIME_MAX)
> >>>>                ct->stall_time = ktime_get();
> >>>>            spin_unlock_irqrestore(&ctb->lock, flags);
> >>>> @@ -733,7 +745,7 @@ static int ct_read(struct intel_guc_ct *ct, struct
> >>>> ct_incoming_msg **msg)
> >>>>    {
> >>>>        struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv;
> >>>>        struct guc_ct_buffer_desc *desc = ctb->desc;
> >>>> -    u32 head = desc->head;
> >>>> +    u32 head = ctb->head;
> >>>>        u32 tail = desc->tail;
> >>>>        u32 size = ctb->size;
> >>>>        u32 *cmds = ctb->cmds;
> >>>> @@ -748,12 +760,29 @@ static int ct_read(struct intel_guc_ct *ct,
> >>>> struct ct_incoming_msg **msg)
> >>>>        if (unlikely(desc->status))
> >>>>            goto corrupted;
> >>>>    -    if (unlikely((tail | head) >= size)) {
> >>>> +    GEM_BUG_ON(head > size);

This is driver owned field so I think a GEM_BUG_ON is correct as if this
blows the driver apart we have a bug in the i915. 

> >>> Is the BUG_ON necessary given that both options below do the same check
> >>> but as a corrupted buffer test (with subsequent recovery by GT reset?)
> >>> rather than killing the driver.
> >> "head" and "size" are now fully owned by the driver.
> >> BUGON here is to make sure driver is coded correctly.
> > The point is that both sides of the #if below also validate head. So
> 
> but not the same "head"
> 
> under DEBUG we are validating the one from descriptor (together with
> tail) - and that should be recoverable as if this fails it was clearly
> not our fault.
> 
> but under non-DEBUG we were attempting to validate again the local one,
> pretending that this is recoverable, but it is not, as this is our fault
> (elsewhere in i915 we don't attempt to recover from obvious coding errors).
>
> > first there is a BUG_ON, then there is the same test but without blowing
> > the driver apart. One or the other is not required. My vote would be to
> > keep the recoverable test rather than the BUG_ON.
> 
> IMHO we should keep GEMBUGON and drop redundant check in non-DEBUG.
> 
> But let Matt decide.
>

I think I'll drop the testing of the head value and keep the BUG_ON.

Matt
 
> Michal
> 
> > 
> > John.
> > 
> >>
> >>>> +
> >>>> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
> >>>> +    if (unlikely(head != READ_ONCE(desc->head))) {
> >>>> +        CT_ERROR(ct, "Head was modified %u != %u\n",
> >>>> +             desc->head, ctb->head);
> >>>> +        desc->status |= GUC_CTB_STATUS_MISMATCH;
> >>>> +        goto corrupted;
> >>>> +    }
> >>>> +    if (unlikely((desc->tail | desc->head) >= size)) {
> >>>> +        CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
> >>>> +             head, tail, size);
> >>>> +        desc->status |= GUC_CTB_STATUS_OVERFLOW;
> >>>> +        goto corrupted;
> >>>> +    }
> >>>> +#else
> >>>> +    if (unlikely((tail | ctb->head) >= size)) {
> >>> Could just be 'head' rather than 'ctb->head'.
> >> or drop "ctb->head" completely since this is driver owned field and
> >> above you already have BUGON to test it
> >>
> >> Michal
> >>
> >>> John.
> >>>
> >>>>            CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
> >>>>                 head, tail, size);
> >>>>            desc->status |= GUC_CTB_STATUS_OVERFLOW;
> >>>>            goto corrupted;
> >>>>        }
> >>>> +#endif
> >>>>          /* tail == head condition indicates empty */
> >>>>        available = tail - head;
> >>>> @@ -803,6 +832,7 @@ static int ct_read(struct intel_guc_ct *ct, struct
> >>>> ct_incoming_msg **msg)
> >>>>        }
> >>>>        CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg);
> >>>>    +    ctb->head = head;
> >>>>        /* now update descriptor */
> >>>>        WRITE_ONCE(desc->head, head);
> >>>>    diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> >>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> >>>> index bee03794c1eb..edd1bba0445d 100644
> >>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> >>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> >>>> @@ -33,6 +33,9 @@ struct intel_guc;
> >>>>     * @desc: pointer to the buffer descriptor
> >>>>     * @cmds: pointer to the commands buffer
> >>>>     * @size: size of the commands buffer in dwords
> >>>> + * @head: local shadow copy of head in dwords
> >>>> + * @tail: local shadow copy of tail in dwords
> >>>> + * @space: local shadow copy of space in dwords
> >>>>     * @broken: flag to indicate if descriptor data is broken
> >>>>     */
> >>>>    struct intel_guc_ct_buffer {
> >>>> @@ -40,6 +43,9 @@ struct intel_guc_ct_buffer {
> >>>>        struct guc_ct_buffer_desc *desc;
> >>>>        u32 *cmds;
> >>>>        u32 size;
> >>>> +    u32 tail;
> >>>> +    u32 head;
> >>>> +    u32 space;
> >>>>        bool broken;
> >>>>    };
> >>>>    
> >
John Harrison July 6, 2021, 10:41 p.m. UTC | #6
On 7/6/2021 12:33, Michal Wajdeczko wrote:
> On 06.07.2021 21:19, John Harrison wrote:
>> On 7/6/2021 12:12, Michal Wajdeczko wrote:
>>> On 06.07.2021 21:00, John Harrison wrote:
>>>> On 7/1/2021 10:15, Matthew Brost wrote:
>>>>> CTB writes are now in the path of command submission and should be
>>>>> optimized for performance. Rather than reading CTB descriptor values
>>>>> (e.g. head, tail) which could result in accesses across the PCIe bus,
>>>>> store shadow local copies and only read/write the descriptor values
>>>>> when
>>>>> absolutely necessary. Also store the current space in the each channel
>>>>> locally.
>>>>>
>>>>> v2:
>>>>>     (Michel)
>>>>>      - Add additional sanity checks for head / tail pointers
>>>>>      - Use GUC_CTB_HDR_LEN rather than magic 1
>>>>>
>>>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>>>>> ---
>>>>>     drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 88
>>>>> +++++++++++++++--------
>>>>>     drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  6 ++
>>>>>     2 files changed, 65 insertions(+), 29 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>>> index a9cb7b608520..5b8b4ff609e2 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>>> @@ -130,6 +130,10 @@ static void guc_ct_buffer_desc_init(struct
>>>>> guc_ct_buffer_desc *desc)
>>>>>     static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb)
>>>>>     {
>>>>>         ctb->broken = false;
>>>>> +    ctb->tail = 0;
>>>>> +    ctb->head = 0;
>>>>> +    ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size);
>>>>> +
>>>>>         guc_ct_buffer_desc_init(ctb->desc);
>>>>>     }
>>>>>     @@ -383,10 +387,8 @@ static int ct_write(struct intel_guc_ct *ct,
>>>>>     {
>>>>>         struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>>>>>         struct guc_ct_buffer_desc *desc = ctb->desc;
>>>>> -    u32 head = desc->head;
>>>>> -    u32 tail = desc->tail;
>>>>> +    u32 tail = ctb->tail;
>>>>>         u32 size = ctb->size;
>>>>> -    u32 used;
>>>>>         u32 header;
>>>>>         u32 hxg;
>>>>>         u32 *cmds = ctb->cmds;
>>>>> @@ -395,25 +397,22 @@ static int ct_write(struct intel_guc_ct *ct,
>>>>>         if (unlikely(desc->status))
>>>>>             goto corrupted;
>>>>>     -    if (unlikely((tail | head) >= size)) {
>>>>> +    GEM_BUG_ON(tail > size);
>>>>> +
>>>>> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
>>>>> +    if (unlikely(tail != READ_ONCE(desc->tail))) {
>>>>> +        CT_ERROR(ct, "Tail was modified %u != %u\n",
>>>>> +             desc->tail, ctb->tail);
>>>>> +        desc->status |= GUC_CTB_STATUS_MISMATCH;
>>>>> +        goto corrupted;
>>>>> +    }
>>>>> +    if (unlikely((desc->tail | desc->head) >= size)) {
>>>>>             CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
>>>>> -             head, tail, size);
>>>>> +             desc->head, desc->tail, size);
>>>>>             desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>>>             goto corrupted;
>>>>>         }
>>>>> -
>>>>> -    /*
>>>>> -     * tail == head condition indicates empty. GuC FW does not support
>>>>> -     * using up the entire buffer to get tail == head meaning full.
>>>>> -     */
>>>>> -    if (tail < head)
>>>>> -        used = (size - head) + tail;
>>>>> -    else
>>>>> -        used = tail - head;
>>>>> -
>>>>> -    /* make sure there is a space including extra dw for the fence */
>>>>> -    if (unlikely(used + len + GUC_CTB_HDR_LEN >= size))
>>>>> -        return -ENOSPC;
>>>>> +#endif
>>>>>           /*
>>>>>          * dw0: CT header (including fence)
>>>>> @@ -454,7 +453,9 @@ static int ct_write(struct intel_guc_ct *ct,
>>>>>         write_barrier(ct);
>>>>>           /* now update descriptor */
>>>>> +    ctb->tail = tail;
>>>>>         WRITE_ONCE(desc->tail, tail);
>>>>> +    ctb->space -= len + GUC_CTB_HDR_LEN;
>>>>>           return 0;
>>>>>     @@ -470,7 +471,7 @@ static int ct_write(struct intel_guc_ct *ct,
>>>>>      * @req:    pointer to pending request
>>>>>      * @status:    placeholder for status
>>>>>      *
>>>>> - * For each sent request, Guc shall send bac CT response message.
>>>>> + * For each sent request, GuC shall send back CT response message.
>>>>>      * Our message handler will update status of tracked request once
>>>>>      * response message with given fence is received. Wait here and
>>>>>      * check for valid response status value.
>>>>> @@ -526,24 +527,35 @@ static inline bool ct_deadlocked(struct
>>>>> intel_guc_ct *ct)
>>>>>         return ret;
>>>>>     }
>>>>>     -static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb,
>>>>> u32 len_dw)
>>>>> +static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
>>>>>     {
>>>>> -    struct guc_ct_buffer_desc *desc = ctb->desc;
>>>>> -    u32 head = READ_ONCE(desc->head);
>>>>> +    struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>>>>> +    u32 head;
>>>>>         u32 space;
>>>>>     -    space = CIRC_SPACE(desc->tail, head, ctb->size);
>>>>> +    if (ctb->space >= len_dw)
>>>>> +        return true;
>>>>> +
>>>>> +    head = READ_ONCE(ctb->desc->head);
>>>>> +    if (unlikely(head > ctb->size)) {
>>>>> +        CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u size=%u\n",
>>>>> +             ctb->desc->head, ctb->desc->tail, ctb->size);
>>>>> +        ctb->desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>>> +        ctb->broken = true;
>>>>> +        return false;
>>>>> +    }
>>>>> +
>>>>> +    space = CIRC_SPACE(ctb->tail, head, ctb->size);
>>>>> +    ctb->space = space;
>>>>>           return space >= len_dw;
>>>>>     }
>>>>>       static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw)
>>>>>     {
>>>>> -    struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>>>>> -
>>>>>         lockdep_assert_held(&ct->ctbs.send.lock);
>>>>>     -    if (unlikely(!h2g_has_room(ctb, len_dw))) {
>>>>> +    if (unlikely(!h2g_has_room(ct, len_dw))) {
>>>>>             if (ct->stall_time == KTIME_MAX)
>>>>>                 ct->stall_time = ktime_get();
>>>>>     @@ -613,7 +625,7 @@ static int ct_send(struct intel_guc_ct *ct,
>>>>>          */
>>>>>     retry:
>>>>>         spin_lock_irqsave(&ctb->lock, flags);
>>>>> -    if (unlikely(!h2g_has_room(ctb, len + GUC_CTB_HDR_LEN))) {
>>>>> +    if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
>>>>>             if (ct->stall_time == KTIME_MAX)
>>>>>                 ct->stall_time = ktime_get();
>>>>>             spin_unlock_irqrestore(&ctb->lock, flags);
>>>>> @@ -733,7 +745,7 @@ static int ct_read(struct intel_guc_ct *ct, struct
>>>>> ct_incoming_msg **msg)
>>>>>     {
>>>>>         struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv;
>>>>>         struct guc_ct_buffer_desc *desc = ctb->desc;
>>>>> -    u32 head = desc->head;
>>>>> +    u32 head = ctb->head;
>>>>>         u32 tail = desc->tail;
>>>>>         u32 size = ctb->size;
>>>>>         u32 *cmds = ctb->cmds;
>>>>> @@ -748,12 +760,29 @@ static int ct_read(struct intel_guc_ct *ct,
>>>>> struct ct_incoming_msg **msg)
>>>>>         if (unlikely(desc->status))
>>>>>             goto corrupted;
>>>>>     -    if (unlikely((tail | head) >= size)) {
>>>>> +    GEM_BUG_ON(head > size);
>>>> Is the BUG_ON necessary given that both options below do the same check
>>>> but as a corrupted buffer test (with subsequent recovery by GT reset?)
>>>> rather than killing the driver.
>>> "head" and "size" are now fully owned by the driver.
>>> BUGON here is to make sure driver is coded correctly.
>> The point is that both sides of the #if below also validate head. So
> but not the same "head"
It is from the point of view of is the BUG_ON redundant duplication:

+    if (unlikely(head != READ_ONCE(desc->head))) {



>
> under DEBUG we are validating the one from descriptor (together with
> tail) - and that should be recoverable as if this fails it was clearly
> not our fault.
>
> but under non-DEBUG we were attempting to validate again the local one,
> pretending that this is recoverable, but it is not, as this is our fault
> (elsewhere in i915 we don't attempt to recover from obvious coding errors).
If it happens during driver development when someone is editing code 
then it might be an obvious coding error. If it happens once in a blue 
moon on some customer's system then it is a very hard to track down race 
condition that is totally not obvious at all. The former won't make it 
past CI whether it is a BUG_ON or a CT_ERROR. The latter has just killed 
the end user's kernel rather than attempted to recover (and would have 
been likely to succeed the recovery).

John.

>
>> first there is a BUG_ON, then there is the same test but without blowing
>> the driver apart. One or the other is not required. My vote would be to
>> keep the recoverable test rather than the BUG_ON.
> IMHO we should keep GEMBUGON and drop redundant check in non-DEBUG.
>
> But let Matt decide.
>
> Michal
>
>> John.
>>
>>>>> +
>>>>> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
>>>>> +    if (unlikely(head != READ_ONCE(desc->head))) {
>>>>> +        CT_ERROR(ct, "Head was modified %u != %u\n",
>>>>> +             desc->head, ctb->head);
>>>>> +        desc->status |= GUC_CTB_STATUS_MISMATCH;
>>>>> +        goto corrupted;
>>>>> +    }
>>>>> +    if (unlikely((desc->tail | desc->head) >= size)) {
>>>>> +        CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
>>>>> +             head, tail, size);
>>>>> +        desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>>> +        goto corrupted;
>>>>> +    }
>>>>> +#else
>>>>> +    if (unlikely((tail | ctb->head) >= size)) {
>>>> Could just be 'head' rather than 'ctb->head'.
>>> or drop "ctb->head" completely since this is driver owned field and
>>> above you already have BUGON to test it
>>>
>>> Michal
>>>
>>>> John.
>>>>
>>>>>             CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
>>>>>                  head, tail, size);
>>>>>             desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>>>             goto corrupted;
>>>>>         }
>>>>> +#endif
>>>>>           /* tail == head condition indicates empty */
>>>>>         available = tail - head;
>>>>> @@ -803,6 +832,7 @@ static int ct_read(struct intel_guc_ct *ct, struct
>>>>> ct_incoming_msg **msg)
>>>>>         }
>>>>>         CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg);
>>>>>     +    ctb->head = head;
>>>>>         /* now update descriptor */
>>>>>         WRITE_ONCE(desc->head, head);
>>>>>     diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>>>> index bee03794c1eb..edd1bba0445d 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>>>> @@ -33,6 +33,9 @@ struct intel_guc;
>>>>>      * @desc: pointer to the buffer descriptor
>>>>>      * @cmds: pointer to the commands buffer
>>>>>      * @size: size of the commands buffer in dwords
>>>>> + * @head: local shadow copy of head in dwords
>>>>> + * @tail: local shadow copy of tail in dwords
>>>>> + * @space: local shadow copy of space in dwords
>>>>>      * @broken: flag to indicate if descriptor data is broken
>>>>>      */
>>>>>     struct intel_guc_ct_buffer {
>>>>> @@ -40,6 +43,9 @@ struct intel_guc_ct_buffer {
>>>>>         struct guc_ct_buffer_desc *desc;
>>>>>         u32 *cmds;
>>>>>         u32 size;
>>>>> +    u32 tail;
>>>>> +    u32 head;
>>>>> +    u32 space;
>>>>>         bool broken;
>>>>>     };
>>>>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index a9cb7b608520..5b8b4ff609e2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -130,6 +130,10 @@  static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc)
 static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb)
 {
 	ctb->broken = false;
+	ctb->tail = 0;
+	ctb->head = 0;
+	ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size);
+
 	guc_ct_buffer_desc_init(ctb->desc);
 }
 
@@ -383,10 +387,8 @@  static int ct_write(struct intel_guc_ct *ct,
 {
 	struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
 	struct guc_ct_buffer_desc *desc = ctb->desc;
-	u32 head = desc->head;
-	u32 tail = desc->tail;
+	u32 tail = ctb->tail;
 	u32 size = ctb->size;
-	u32 used;
 	u32 header;
 	u32 hxg;
 	u32 *cmds = ctb->cmds;
@@ -395,25 +397,22 @@  static int ct_write(struct intel_guc_ct *ct,
 	if (unlikely(desc->status))
 		goto corrupted;
 
-	if (unlikely((tail | head) >= size)) {
+	GEM_BUG_ON(tail > size);
+
+#ifdef CONFIG_DRM_I915_DEBUG_GUC
+	if (unlikely(tail != READ_ONCE(desc->tail))) {
+		CT_ERROR(ct, "Tail was modified %u != %u\n",
+			 desc->tail, ctb->tail);
+		desc->status |= GUC_CTB_STATUS_MISMATCH;
+		goto corrupted;
+	}
+	if (unlikely((desc->tail | desc->head) >= size)) {
 		CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
-			 head, tail, size);
+			 desc->head, desc->tail, size);
 		desc->status |= GUC_CTB_STATUS_OVERFLOW;
 		goto corrupted;
 	}
-
-	/*
-	 * tail == head condition indicates empty. GuC FW does not support
-	 * using up the entire buffer to get tail == head meaning full.
-	 */
-	if (tail < head)
-		used = (size - head) + tail;
-	else
-		used = tail - head;
-
-	/* make sure there is a space including extra dw for the fence */
-	if (unlikely(used + len + GUC_CTB_HDR_LEN >= size))
-		return -ENOSPC;
+#endif
 
 	/*
 	 * dw0: CT header (including fence)
@@ -454,7 +453,9 @@  static int ct_write(struct intel_guc_ct *ct,
 	write_barrier(ct);
 
 	/* now update descriptor */
+	ctb->tail = tail;
 	WRITE_ONCE(desc->tail, tail);
+	ctb->space -= len + GUC_CTB_HDR_LEN;
 
 	return 0;
 
@@ -470,7 +471,7 @@  static int ct_write(struct intel_guc_ct *ct,
  * @req:	pointer to pending request
  * @status:	placeholder for status
  *
- * For each sent request, Guc shall send bac CT response message.
+ * For each sent request, GuC shall send back CT response message.
  * Our message handler will update status of tracked request once
  * response message with given fence is received. Wait here and
  * check for valid response status value.
@@ -526,24 +527,35 @@  static inline bool ct_deadlocked(struct intel_guc_ct *ct)
 	return ret;
 }
 
-static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb, u32 len_dw)
+static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
 {
-	struct guc_ct_buffer_desc *desc = ctb->desc;
-	u32 head = READ_ONCE(desc->head);
+	struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
+	u32 head;
 	u32 space;
 
-	space = CIRC_SPACE(desc->tail, head, ctb->size);
+	if (ctb->space >= len_dw)
+		return true;
+
+	head = READ_ONCE(ctb->desc->head);
+	if (unlikely(head > ctb->size)) {
+		CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u size=%u\n",
+			 ctb->desc->head, ctb->desc->tail, ctb->size);
+		ctb->desc->status |= GUC_CTB_STATUS_OVERFLOW;
+		ctb->broken = true;
+		return false;
+	}
+
+	space = CIRC_SPACE(ctb->tail, head, ctb->size);
+	ctb->space = space;
 
 	return space >= len_dw;
 }
 
 static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw)
 {
-	struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
-
 	lockdep_assert_held(&ct->ctbs.send.lock);
 
-	if (unlikely(!h2g_has_room(ctb, len_dw))) {
+	if (unlikely(!h2g_has_room(ct, len_dw))) {
 		if (ct->stall_time == KTIME_MAX)
 			ct->stall_time = ktime_get();
 
@@ -613,7 +625,7 @@  static int ct_send(struct intel_guc_ct *ct,
 	 */
 retry:
 	spin_lock_irqsave(&ctb->lock, flags);
-	if (unlikely(!h2g_has_room(ctb, len + GUC_CTB_HDR_LEN))) {
+	if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
 		if (ct->stall_time == KTIME_MAX)
 			ct->stall_time = ktime_get();
 		spin_unlock_irqrestore(&ctb->lock, flags);
@@ -733,7 +745,7 @@  static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
 {
 	struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv;
 	struct guc_ct_buffer_desc *desc = ctb->desc;
-	u32 head = desc->head;
+	u32 head = ctb->head;
 	u32 tail = desc->tail;
 	u32 size = ctb->size;
 	u32 *cmds = ctb->cmds;
@@ -748,12 +760,29 @@  static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
 	if (unlikely(desc->status))
 		goto corrupted;
 
-	if (unlikely((tail | head) >= size)) {
+	GEM_BUG_ON(head > size);
+
+#ifdef CONFIG_DRM_I915_DEBUG_GUC
+	if (unlikely(head != READ_ONCE(desc->head))) {
+		CT_ERROR(ct, "Head was modified %u != %u\n",
+			 desc->head, ctb->head);
+		desc->status |= GUC_CTB_STATUS_MISMATCH;
+		goto corrupted;
+	}
+	if (unlikely((desc->tail | desc->head) >= size)) {
+		CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
+			 head, tail, size);
+		desc->status |= GUC_CTB_STATUS_OVERFLOW;
+		goto corrupted;
+	}
+#else
+	if (unlikely((tail | ctb->head) >= size)) {
 		CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
 			 head, tail, size);
 		desc->status |= GUC_CTB_STATUS_OVERFLOW;
 		goto corrupted;
 	}
+#endif
 
 	/* tail == head condition indicates empty */
 	available = tail - head;
@@ -803,6 +832,7 @@  static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
 	}
 	CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg);
 
+	ctb->head = head;
 	/* now update descriptor */
 	WRITE_ONCE(desc->head, head);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
index bee03794c1eb..edd1bba0445d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -33,6 +33,9 @@  struct intel_guc;
  * @desc: pointer to the buffer descriptor
  * @cmds: pointer to the commands buffer
  * @size: size of the commands buffer in dwords
+ * @head: local shadow copy of head in dwords
+ * @tail: local shadow copy of tail in dwords
+ * @space: local shadow copy of space in dwords
  * @broken: flag to indicate if descriptor data is broken
  */
 struct intel_guc_ct_buffer {
@@ -40,6 +43,9 @@  struct intel_guc_ct_buffer {
 	struct guc_ct_buffer_desc *desc;
 	u32 *cmds;
 	u32 size;
+	u32 tail;
+	u32 head;
+	u32 space;
 	bool broken;
 };