diff mbox series

[v2,4/5] drm/i915/guc: Add GuC CT specific debug print wrappers

Message ID 20221118015858.2548106-5-John.C.Harrison@Intel.com (mailing list archive)
State New, archived
Headers show
Series Add module oriented dmesg output | expand

Commit Message

John Harrison Nov. 18, 2022, 1:58 a.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

Re-work the existing GuC CT printers and extend as required to match
the new wrapping scheme.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 222 +++++++++++-----------
 1 file changed, 113 insertions(+), 109 deletions(-)

Comments

Michal Wajdeczko Nov. 22, 2022, 5:54 p.m. UTC | #1
On 18.11.2022 02:58, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
> 
> Re-work the existing GuC CT printers and extend as required to match
> the new wrapping scheme.
> 
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 222 +++++++++++-----------
>  1 file changed, 113 insertions(+), 109 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> index 2b22065e87bf9..9d404fb377637 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> @@ -18,31 +18,49 @@ static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct)
>  	return container_of(ct, struct intel_guc, ct);
>  }
>  
> -static inline struct intel_gt *ct_to_gt(struct intel_guc_ct *ct)
> -{
> -	return guc_to_gt(ct_to_guc(ct));
> -}
> -
>  static inline struct drm_i915_private *ct_to_i915(struct intel_guc_ct *ct)
>  {
> -	return ct_to_gt(ct)->i915;
> -}
> +	struct intel_guc *guc = ct_to_guc(ct);
> +	struct intel_gt *gt = guc_to_gt(guc);
>  
> -static inline struct drm_device *ct_to_drm(struct intel_guc_ct *ct)
> -{
> -	return &ct_to_i915(ct)->drm;
> +	return gt->i915;
>  }
>  
> -#define CT_ERROR(_ct, _fmt, ...) \
> -	drm_err(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
> +#define ct_err(_ct, _fmt, ...) \
> +	guc_err(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
> +
> +#define ct_warn(_ct, _fmt, ...) \
> +	guc_warn(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
> +
> +#define ct_notice(_ct, _fmt, ...) \
> +	guc_notice(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
> +
> +#define ct_info(_ct, _fmt, ...) \
> +	guc_info(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
> +
>  #ifdef CONFIG_DRM_I915_DEBUG_GUC
> -#define CT_DEBUG(_ct, _fmt, ...) \
> -	drm_dbg(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
> +#define ct_dbg(_ct, _fmt, ...) \
> +	guc_dbg(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>  #else
> -#define CT_DEBUG(...)	do { } while (0)
> +#define ct_dbg(...)	do { } while (0)
>  #endif
> -#define CT_PROBE_ERROR(_ct, _fmt, ...) \
> -	i915_probe_error(ct_to_i915(ct), "CT: " _fmt, ##__VA_ARGS__)
> +
> +#define ct_probe_error(_ct, _fmt, ...) \
> +	do { \
> +		if (i915_error_injected()) \
> +			ct_dbg(_ct, _fmt, ##__VA_ARGS__); \
> +		else \
> +			ct_err(_ct, _fmt, ##__VA_ARGS__); \
> +	} while (0)

guc_probe_error ?

> +
> +#define ct_WARN_ON(_ct, _condition) \
> +	ct_WARN(_ct, _condition, "%s", "ct_WARN_ON(" __stringify(_condition) ")")
> +
> +#define ct_WARN(_ct, _condition, _fmt, ...) \
> +	guc_WARN(ct_to_guc(_ct), _condition, "CT " _fmt, ##__VA_ARGS__)
> +
> +#define ct_WARN_ONCE(_ct, _condition, _fmt, ...) \
> +	guc_WARN_ONCE(ct_to_guc(_ct), _condition, "CT " _fmt, ##__VA_ARGS__)
>  
>  /**
>   * DOC: CTB Blob
> @@ -170,7 +188,7 @@ static int ct_control_enable(struct intel_guc_ct *ct, bool enable)
>  	err = guc_action_control_ctb(ct_to_guc(ct), enable ?
>  				     GUC_CTB_CONTROL_ENABLE : GUC_CTB_CONTROL_DISABLE);
>  	if (unlikely(err))
> -		CT_PROBE_ERROR(ct, "Failed to control/%s CTB (%pe)\n",
> +		ct_probe_error(ct, "Failed to control/%s CTB (%pe)\n",
>  			       str_enable_disable(enable), ERR_PTR(err));

btw, shouldn't we change all messages to start with lowercase ?

was:
	"CT0: Failed to control/%s CTB (%pe)"
is:
	"GT0: GuC CT Failed to control/%s CTB (%pe)"

unless we keep colon (as suggested by Tvrtko) as then:

	"GT0: GuC CT: Failed to control/%s CTB (%pe)"

Michal

>  
>  	return err;
> @@ -201,7 +219,7 @@ static int ct_register_buffer(struct intel_guc_ct *ct, bool send,
>  				   size);
>  	if (unlikely(err))
>  failed:
> -		CT_PROBE_ERROR(ct, "Failed to register %s buffer (%pe)\n",
> +		ct_probe_error(ct, "Failed to register %s buffer (%pe)\n",
>  			       send ? "SEND" : "RECV", ERR_PTR(err));
>  
>  	return err;
> @@ -235,21 +253,21 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
>  	blob_size = 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE + CTB_G2H_BUFFER_SIZE;
>  	err = intel_guc_allocate_and_map_vma(guc, blob_size, &ct->vma, &blob);
>  	if (unlikely(err)) {
> -		CT_PROBE_ERROR(ct, "Failed to allocate %u for CTB data (%pe)\n",
> +		ct_probe_error(ct, "Failed to allocate %u for CTB data (%pe)\n",
>  			       blob_size, ERR_PTR(err));
>  		return err;
>  	}
>  
> -	CT_DEBUG(ct, "base=%#x size=%u\n", intel_guc_ggtt_offset(guc, ct->vma), blob_size);
> +	ct_dbg(ct, "base=%#x size=%u\n", intel_guc_ggtt_offset(guc, ct->vma), blob_size);
>  
>  	/* store pointers to desc and cmds for send ctb */
>  	desc = blob;
>  	cmds = blob + 2 * CTB_DESC_SIZE;
>  	cmds_size = CTB_H2G_BUFFER_SIZE;
>  	resv_space = 0;
> -	CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send",
> -		 ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
> -		 resv_space);
> +	ct_dbg(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send",
> +	       ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
> +	       resv_space);
>  
>  	guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size, resv_space);
>  
> @@ -258,9 +276,9 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
>  	cmds = blob + 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE;
>  	cmds_size = CTB_G2H_BUFFER_SIZE;
>  	resv_space = G2H_ROOM_BUFFER_SIZE;
> -	CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv",
> -		 ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
> -		 resv_space);
> +	ct_dbg(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv",
> +	       ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
> +	       resv_space);
>  
>  	guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size, resv_space);
>  
> @@ -338,7 +356,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
>  	return 0;
>  
>  err_out:
> -	CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
> +	ct_probe_error(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
>  	return err;
>  }
>  
> @@ -387,14 +405,12 @@ static int ct_write(struct intel_guc_ct *ct,
>  
>  #ifdef CONFIG_DRM_I915_DEBUG_GUC
>  	if (unlikely(tail != READ_ONCE(desc->tail))) {
> -		CT_ERROR(ct, "Tail was modified %u != %u\n",
> -			 desc->tail, tail);
> +		ct_err(ct, "Tail was modified %u != %u\n", desc->tail, tail);
>  		desc->status |= GUC_CTB_STATUS_MISMATCH;
>  		goto corrupted;
>  	}
>  	if (unlikely(READ_ONCE(desc->head) >= size)) {
> -		CT_ERROR(ct, "Invalid head offset %u >= %u)\n",
> -			 desc->head, size);
> +		ct_err(ct, "Invalid head offset %u >= %u)\n", desc->head, size);
>  		desc->status |= GUC_CTB_STATUS_OVERFLOW;
>  		goto corrupted;
>  	}
> @@ -415,8 +431,8 @@ static int ct_write(struct intel_guc_ct *ct,
>  		FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
>  			   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
>  
> -	CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n",
> -		 tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
> +	ct_dbg(ct, "writing (tail %u) %*ph %*ph %*ph\n",
> +	       tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
>  
>  	cmds[tail] = header;
>  	tail = (tail + 1) % size;
> @@ -447,8 +463,8 @@ static int ct_write(struct intel_guc_ct *ct,
>  	return 0;
>  
>  corrupted:
> -	CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
> -		 desc->head, desc->tail, desc->status);
> +	ct_err(ct, "Corrupted descriptor on write head=%u tail=%u status=%#x\n",
> +	       desc->head, desc->tail, desc->status);
>  	ctb->broken = true;
>  	return -EPIPE;
>  }
> @@ -507,17 +523,14 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct)
>  		struct guc_ct_buffer_desc *send = ct->ctbs.send.desc;
>  		struct guc_ct_buffer_desc *recv = ct->ctbs.send.desc;
>  
> -		CT_ERROR(ct, "Communication stalled for %lld ms, desc status=%#x,%#x\n",
> -			 ktime_ms_delta(ktime_get(), ct->stall_time),
> -			 send->status, recv->status);
> -		CT_ERROR(ct, "H2G Space: %u (Bytes)\n",
> -			 atomic_read(&ct->ctbs.send.space) * 4);
> -		CT_ERROR(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head);
> -		CT_ERROR(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail);
> -		CT_ERROR(ct, "G2H Space: %u (Bytes)\n",
> -			 atomic_read(&ct->ctbs.recv.space) * 4);
> -		CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
> -		CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
> +		ct_err(ct, "Communication stalled for %lld ms, desc status=%#x,%#x\n",
> +		       ktime_ms_delta(ktime_get(), ct->stall_time), send->status, recv->status);
> +		ct_err(ct, "H2G Space: %u (Bytes)\n", atomic_read(&ct->ctbs.send.space) * 4);
> +		ct_err(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head);
> +		ct_err(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail);
> +		ct_err(ct, "G2H Space: %u (Bytes)\n", atomic_read(&ct->ctbs.recv.space) * 4);
> +		ct_err(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
> +		ct_err(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
>  
>  		ct->ctbs.send.broken = true;
>  	}
> @@ -563,8 +576,7 @@ static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
>  
>  	head = READ_ONCE(desc->head);
>  	if (unlikely(head > ctb->size)) {
> -		CT_ERROR(ct, "Invalid head offset %u >= %u)\n",
> -			 head, ctb->size);
> +		ct_err(ct, "Invalid head offset %u >= %u)\n", head, ctb->size);
>  		desc->status |= GUC_CTB_STATUS_OVERFLOW;
>  		ctb->broken = true;
>  		return false;
> @@ -715,17 +727,17 @@ static int ct_send(struct intel_guc_ct *ct,
>  			/* wait_for_ct_request_update returns -ENODEV on reset/suspend in progress.
>  			 * In this case, output is debug rather than error info
>  			 */
> -			CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n",
> -				 action[0], request.fence);
> +			ct_dbg(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n",
> +			       action[0], request.fence);
>  		else
> -			CT_ERROR(ct, "No response for request %#x (fence %u)\n",
> -				 action[0], request.fence);
> +			ct_err(ct, "No response for request %#x (fence %u)\n",
> +			       action[0], request.fence);
>  		goto unlink;
>  	}
>  
>  	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
> -		CT_DEBUG(ct, "retrying request %#x (%u)\n", *action,
> -			 FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status));
> +		ct_dbg(ct, "retrying request %#x (%u)\n", *action,
> +		       FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status));
>  		send_again = true;
>  		goto unlink;
>  	}
> @@ -737,12 +749,12 @@ static int ct_send(struct intel_guc_ct *ct,
>  
>  	if (response_buf) {
>  		/* There shall be no data in the status */
> -		WARN_ON(FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, request.status));
> +		ct_WARN_ON(ct, FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, request.status));
>  		/* Return actual response len */
>  		err = request.response_len;
>  	} else {
>  		/* There shall be no response payload */
> -		WARN_ON(request.response_len);
> +		ct_WARN_ON(ct, request.response_len);
>  		/* Return data decoded from the status dword */
>  		err = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, *status);
>  	}
> @@ -771,7 +783,7 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
>  		struct intel_guc *guc = ct_to_guc(ct);
>  		struct intel_uc *uc = container_of(guc, struct intel_uc, guc);
>  
> -		WARN(!uc->reset_in_progress, "Unexpected send: action=%#x\n", *action);
> +		ct_WARN(ct, !uc->reset_in_progress, "Unexpected send: action=%#x\n", *action);
>  		return -ENODEV;
>  	}
>  
> @@ -784,11 +796,11 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
>  	ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
>  	if (unlikely(ret < 0)) {
>  		if (ret != -ENODEV)
> -			CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
> -				 action[0], ERR_PTR(ret), status);
> +			ct_err(ct, "sending action %#x failed (%pe) status=%#X\n",
> +			       action[0], ERR_PTR(ret), status);
>  	} else if (unlikely(ret)) {
> -		CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
> -			 action[0], ret, ret);
> +		ct_dbg(ct, "send action %#x returned %d (%#x)\n",
> +		       action[0], ret, ret);
>  	}
>  
>  	return ret;
> @@ -838,7 +850,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>  			 * contexts/engines being reset. But should never happen as
>  			 * no contexts should be active when CLIENT_RESET is sent.
>  			 */
> -			CT_ERROR(ct, "Unexpected G2H after GuC has stopped!\n");
> +			ct_err(ct, "Unexpected G2H after GuC has stopped!\n");
>  			status &= ~GUC_CTB_STATUS_UNUSED;
>  		}
>  
> @@ -850,15 +862,13 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>  
>  #ifdef CONFIG_DRM_I915_DEBUG_GUC
>  	if (unlikely(head != READ_ONCE(desc->head))) {
> -		CT_ERROR(ct, "Head was modified %u != %u\n",
> -			 desc->head, head);
> +		ct_err(ct, "Head was modified %u != %u\n", desc->head, head);
>  		desc->status |= GUC_CTB_STATUS_MISMATCH;
>  		goto corrupted;
>  	}
>  #endif
>  	if (unlikely(tail >= size)) {
> -		CT_ERROR(ct, "Invalid tail offset %u >= %u)\n",
> -			 tail, size);
> +		ct_err(ct, "Invalid tail offset %u >= %u)\n", tail, size);
>  		desc->status |= GUC_CTB_STATUS_OVERFLOW;
>  		goto corrupted;
>  	}
> @@ -873,7 +883,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>  	/* beware of buffer wrap case */
>  	if (unlikely(available < 0))
>  		available += size;
> -	CT_DEBUG(ct, "available %d (%u:%u:%u)\n", available, head, tail, size);
> +	ct_dbg(ct, "read available %d (%u:%u:%u)\n", available, head, tail, size);
>  	GEM_BUG_ON(available < 0);
>  
>  	header = cmds[head];
> @@ -882,24 +892,24 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>  	/* message len with header */
>  	len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, header) + GUC_CTB_MSG_MIN_LEN;
>  	if (unlikely(len > (u32)available)) {
> -		CT_ERROR(ct, "Incomplete message %*ph %*ph %*ph\n",
> -			 4, &header,
> -			 4 * (head + available - 1 > size ?
> -			      size - head : available - 1), &cmds[head],
> -			 4 * (head + available - 1 > size ?
> -			      available - 1 - size + head : 0), &cmds[0]);
> +		ct_err(ct, "Incomplete message %*ph %*ph %*ph\n",
> +		       4, &header,
> +		       4 * (head + available - 1 > size ?
> +			    size - head : available - 1), &cmds[head],
> +		       4 * (head + available - 1 > size ?
> +			    available - 1 - size + head : 0), &cmds[0]);
>  		desc->status |= GUC_CTB_STATUS_UNDERFLOW;
>  		goto corrupted;
>  	}
>  
>  	*msg = ct_alloc_msg(len);
>  	if (!*msg) {
> -		CT_ERROR(ct, "No memory for message %*ph %*ph %*ph\n",
> -			 4, &header,
> -			 4 * (head + available - 1 > size ?
> -			      size - head : available - 1), &cmds[head],
> -			 4 * (head + available - 1 > size ?
> -			      available - 1 - size + head : 0), &cmds[0]);
> +		ct_err(ct, "No memory for message %*ph %*ph %*ph\n",
> +		       4, &header,
> +		       4 * (head + available - 1 > size ?
> +			    size - head : available - 1), &cmds[head],
> +		       4 * (head + available - 1 > size ?
> +			    available - 1 - size + head : 0), &cmds[0]);
>  		return available;
>  	}
>  
> @@ -909,7 +919,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>  		(*msg)->msg[i] = cmds[head];
>  		head = (head + 1) % size;
>  	}
> -	CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg);
> +	ct_dbg(ct, "received %*ph\n", 4 * len, (*msg)->msg);
>  
>  	/* update local copies */
>  	ctb->head = head;
> @@ -920,8 +930,8 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>  	return available - len;
>  
>  corrupted:
> -	CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
> -		 desc->head, desc->tail, desc->status);
> +	ct_err(ct, "Corrupted descriptor on read head=%u tail=%u status=%#x\n",
> +	       desc->head, desc->tail, desc->status);
>  	ctb->broken = true;
>  	return -EPIPE;
>  }
> @@ -944,18 +954,17 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
>  		   FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_NO_RESPONSE_RETRY &&
>  		   FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_FAILURE);
>  
> -	CT_DEBUG(ct, "response fence %u status %#x\n", fence, hxg[0]);
> +	ct_dbg(ct, "response fence %u status %#x\n", fence, hxg[0]);
>  
>  	spin_lock_irqsave(&ct->requests.lock, flags);
>  	list_for_each_entry(req, &ct->requests.pending, link) {
>  		if (unlikely(fence != req->fence)) {
> -			CT_DEBUG(ct, "request %u awaits response\n",
> -				 req->fence);
> +			ct_dbg(ct, "request %u awaits response\n", req->fence);
>  			continue;
>  		}
>  		if (unlikely(datalen > req->response_len)) {
> -			CT_ERROR(ct, "Response %u too long (datalen %u > %u)\n",
> -				 req->fence, datalen, req->response_len);
> +			ct_err(ct, "response %u too long (datalen %u > %u)\n",
> +			       req->fence, datalen, req->response_len);
>  			datalen = min(datalen, req->response_len);
>  			err = -EMSGSIZE;
>  		}
> @@ -967,12 +976,11 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
>  		break;
>  	}
>  	if (!found) {
> -		CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence);
> -		CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n", fence,
> -			 ct->requests.last_fence);
> +		ct_err(ct, "Unsolicited response (fence %u)\n", fence);
> +		ct_err(ct, "Could not find fence=%u, last_fence=%u\n", fence,
> +		       ct->requests.last_fence);
>  		list_for_each_entry(req, &ct->requests.pending, link)
> -			CT_ERROR(ct, "request %u awaits response\n",
> -				 req->fence);
> +			ct_err(ct, "request %u awaits response\n", req->fence);
>  		err = -ENOKEY;
>  	}
>  	spin_unlock_irqrestore(&ct->requests.lock, flags);
> @@ -998,7 +1006,7 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
>  	action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
>  	len = hxg_len - GUC_HXG_MSG_MIN_LEN;
>  
> -	CT_DEBUG(ct, "request %x %*ph\n", action, 4 * len, payload);
> +	ct_dbg(ct, "request %x %*ph\n", action, 4 * len, payload);
>  
>  	switch (action) {
>  	case INTEL_GUC_ACTION_DEFAULT:
> @@ -1016,9 +1024,6 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
>  		break;
>  	case INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
>  		ret = intel_guc_error_capture_process_msg(guc, payload, len);
> -		if (unlikely(ret))
> -			CT_ERROR(ct, "error capture notification failed %x %*ph\n",
> -				 action, 4 * len, payload);
>  		break;
>  	case INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
>  		ret = intel_guc_engine_failure_process_msg(guc, payload, len);
> @@ -1028,11 +1033,11 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
>  		ret = 0;
>  		break;
>  	case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED:
> -		CT_ERROR(ct, "Received GuC crash dump notification!\n");
> +		guc_err(guc, "notification: Crash dump!\n");
>  		ret = 0;
>  		break;
>  	case INTEL_GUC_ACTION_NOTIFY_EXCEPTION:
> -		CT_ERROR(ct, "Received GuC exception notification!\n");
> +		guc_err(guc, "notification: Exception!\n");
>  		ret = 0;
>  		break;
>  	default:
> @@ -1041,8 +1046,7 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
>  	}
>  
>  	if (unlikely(ret)) {
> -		CT_ERROR(ct, "Failed to process request %04x (%pe)\n",
> -			 action, ERR_PTR(ret));
> +		ct_err(ct, "Failed to process request %04x (%pe)\n", action, ERR_PTR(ret));
>  		return ret;
>  	}
>  
> @@ -1070,8 +1074,8 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct)
>  
>  	err = ct_process_request(ct, request);
>  	if (unlikely(err)) {
> -		CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n",
> -			 ERR_PTR(err), 4 * request->size, request->msg);
> +		ct_err(ct, "Failed to process message (%pe) %*ph\n",
> +		       ERR_PTR(err), 4 * request->size, request->msg);
>  		ct_free_msg(request);
>  	}
>  
> @@ -1149,8 +1153,8 @@ static int ct_handle_hxg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg)
>  
>  	if (unlikely(err)) {
>  failed:
> -		CT_ERROR(ct, "Failed to handle HXG message (%pe) %*ph\n",
> -			 ERR_PTR(err), 4 * GUC_HXG_MSG_MIN_LEN, hxg);
> +		ct_err(ct, "Failed to handle HXG message (%pe) %*ph\n",
> +		       ERR_PTR(err), 4 * GUC_HXG_MSG_MIN_LEN, hxg);
>  	}
>  	return err;
>  }
> @@ -1166,8 +1170,8 @@ static void ct_handle_msg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg)
>  		err = -EOPNOTSUPP;
>  
>  	if (unlikely(err)) {
> -		CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n",
> -			 ERR_PTR(err), 4 * msg->size, msg->msg);
> +		ct_err(ct, "Failed to handle message (%pe) %*ph\n",
> +		       ERR_PTR(err), 4 * msg->size, msg->msg);
>  		ct_free_msg(msg);
>  	}
>  }
> @@ -1198,7 +1202,7 @@ static void ct_try_receive_message(struct intel_guc_ct *ct)
>  {
>  	int ret;
>  
> -	if (GEM_WARN_ON(!ct->enabled))
> +	if (ct_WARN_ON(ct, !ct->enabled))
>  		return;
>  
>  	ret = ct_receive(ct);
> @@ -1220,7 +1224,7 @@ static void ct_receive_tasklet_func(struct tasklet_struct *t)
>  void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
>  {
>  	if (unlikely(!ct->enabled)) {
> -		WARN(1, "Unexpected GuC event received while CT disabled!\n");
> +		ct_warn(ct, "Unexpected event received while disabled!\n");
>  		return;
>  	}
>
John Harrison Nov. 23, 2022, 1:25 a.m. UTC | #2
On 11/22/2022 09:54, Michal Wajdeczko wrote:
> On 18.11.2022 02:58, John.C.Harrison@Intel.com wrote:
>> From: John Harrison <John.C.Harrison@Intel.com>
>>
>> Re-work the existing GuC CT printers and extend as required to match
>> the new wrapping scheme.
>>
>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 222 +++++++++++-----------
>>   1 file changed, 113 insertions(+), 109 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> index 2b22065e87bf9..9d404fb377637 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> @@ -18,31 +18,49 @@ static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct)
>>   	return container_of(ct, struct intel_guc, ct);
>>   }
>>   
>> -static inline struct intel_gt *ct_to_gt(struct intel_guc_ct *ct)
>> -{
>> -	return guc_to_gt(ct_to_guc(ct));
>> -}
>> -
>>   static inline struct drm_i915_private *ct_to_i915(struct intel_guc_ct *ct)
>>   {
>> -	return ct_to_gt(ct)->i915;
>> -}
>> +	struct intel_guc *guc = ct_to_guc(ct);
>> +	struct intel_gt *gt = guc_to_gt(guc);
>>   
>> -static inline struct drm_device *ct_to_drm(struct intel_guc_ct *ct)
>> -{
>> -	return &ct_to_i915(ct)->drm;
>> +	return gt->i915;
>>   }
>>   
>> -#define CT_ERROR(_ct, _fmt, ...) \
>> -	drm_err(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
>> +#define ct_err(_ct, _fmt, ...) \
>> +	guc_err(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>> +
>> +#define ct_warn(_ct, _fmt, ...) \
>> +	guc_warn(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>> +
>> +#define ct_notice(_ct, _fmt, ...) \
>> +	guc_notice(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>> +
>> +#define ct_info(_ct, _fmt, ...) \
>> +	guc_info(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>> +
>>   #ifdef CONFIG_DRM_I915_DEBUG_GUC
>> -#define CT_DEBUG(_ct, _fmt, ...) \
>> -	drm_dbg(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
>> +#define ct_dbg(_ct, _fmt, ...) \
>> +	guc_dbg(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>   #else
>> -#define CT_DEBUG(...)	do { } while (0)
>> +#define ct_dbg(...)	do { } while (0)
>>   #endif
>> -#define CT_PROBE_ERROR(_ct, _fmt, ...) \
>> -	i915_probe_error(ct_to_i915(ct), "CT: " _fmt, ##__VA_ARGS__)
>> +
>> +#define ct_probe_error(_ct, _fmt, ...) \
>> +	do { \
>> +		if (i915_error_injected()) \
>> +			ct_dbg(_ct, _fmt, ##__VA_ARGS__); \
>> +		else \
>> +			ct_err(_ct, _fmt, ##__VA_ARGS__); \
>> +	} while (0)
> guc_probe_error ?
>
>> +
>> +#define ct_WARN_ON(_ct, _condition) \
>> +	ct_WARN(_ct, _condition, "%s", "ct_WARN_ON(" __stringify(_condition) ")")
>> +
>> +#define ct_WARN(_ct, _condition, _fmt, ...) \
>> +	guc_WARN(ct_to_guc(_ct), _condition, "CT " _fmt, ##__VA_ARGS__)
>> +
>> +#define ct_WARN_ONCE(_ct, _condition, _fmt, ...) \
>> +	guc_WARN_ONCE(ct_to_guc(_ct), _condition, "CT " _fmt, ##__VA_ARGS__)
>>   
>>   /**
>>    * DOC: CTB Blob
>> @@ -170,7 +188,7 @@ static int ct_control_enable(struct intel_guc_ct *ct, bool enable)
>>   	err = guc_action_control_ctb(ct_to_guc(ct), enable ?
>>   				     GUC_CTB_CONTROL_ENABLE : GUC_CTB_CONTROL_DISABLE);
>>   	if (unlikely(err))
>> -		CT_PROBE_ERROR(ct, "Failed to control/%s CTB (%pe)\n",
>> +		ct_probe_error(ct, "Failed to control/%s CTB (%pe)\n",
>>   			       str_enable_disable(enable), ERR_PTR(err));
> btw, shouldn't we change all messages to start with lowercase ?
>
> was:
> 	"CT0: Failed to control/%s CTB (%pe)"
> is:
> 	"GT0: GuC CT Failed to control/%s CTB (%pe)"
>
> unless we keep colon (as suggested by Tvrtko) as then:
>
> 	"GT0: GuC CT: Failed to control/%s CTB (%pe)"
Blanket added the colon makes it messy when a string actually wants to 
start with the prefix. The rule I've been using is lower case word when 
the prefix was part of the string, upper case word when the prefix is 
just being added as a prefix. I originally just had the prefix as raw 
with no trailing space, so the individual print could decide to add a 
colon, a space, or whatever as appropriate. But that just makes for 
messy code with some files having every string look like ": Stuff 
happened" and other files have every string look like " failed to ...". 
The current version seems to be the most readable from the point of view 
of writing the code and of reading the dmesg results.

And to be clear, the 'CT0' you have in your 'was' example only exists in 
the internal tree. It never made it to upstream. It is also just plain 
wrong. Each GT has two CTs - send and receive. So having 'CT1' meaning 
some random CT on GT1 (as opposed to the read channel on GT0, for 
example) was very confusing.

John.


>
> Michal
>
>>   
>>   	return err;
>> @@ -201,7 +219,7 @@ static int ct_register_buffer(struct intel_guc_ct *ct, bool send,
>>   				   size);
>>   	if (unlikely(err))
>>   failed:
>> -		CT_PROBE_ERROR(ct, "Failed to register %s buffer (%pe)\n",
>> +		ct_probe_error(ct, "Failed to register %s buffer (%pe)\n",
>>   			       send ? "SEND" : "RECV", ERR_PTR(err));
>>   
>>   	return err;
>> @@ -235,21 +253,21 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
>>   	blob_size = 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE + CTB_G2H_BUFFER_SIZE;
>>   	err = intel_guc_allocate_and_map_vma(guc, blob_size, &ct->vma, &blob);
>>   	if (unlikely(err)) {
>> -		CT_PROBE_ERROR(ct, "Failed to allocate %u for CTB data (%pe)\n",
>> +		ct_probe_error(ct, "Failed to allocate %u for CTB data (%pe)\n",
>>   			       blob_size, ERR_PTR(err));
>>   		return err;
>>   	}
>>   
>> -	CT_DEBUG(ct, "base=%#x size=%u\n", intel_guc_ggtt_offset(guc, ct->vma), blob_size);
>> +	ct_dbg(ct, "base=%#x size=%u\n", intel_guc_ggtt_offset(guc, ct->vma), blob_size);
>>   
>>   	/* store pointers to desc and cmds for send ctb */
>>   	desc = blob;
>>   	cmds = blob + 2 * CTB_DESC_SIZE;
>>   	cmds_size = CTB_H2G_BUFFER_SIZE;
>>   	resv_space = 0;
>> -	CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send",
>> -		 ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>> -		 resv_space);
>> +	ct_dbg(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send",
>> +	       ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>> +	       resv_space);
>>   
>>   	guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size, resv_space);
>>   
>> @@ -258,9 +276,9 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
>>   	cmds = blob + 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE;
>>   	cmds_size = CTB_G2H_BUFFER_SIZE;
>>   	resv_space = G2H_ROOM_BUFFER_SIZE;
>> -	CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv",
>> -		 ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>> -		 resv_space);
>> +	ct_dbg(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv",
>> +	       ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>> +	       resv_space);
>>   
>>   	guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size, resv_space);
>>   
>> @@ -338,7 +356,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
>>   	return 0;
>>   
>>   err_out:
>> -	CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
>> +	ct_probe_error(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
>>   	return err;
>>   }
>>   
>> @@ -387,14 +405,12 @@ static int ct_write(struct intel_guc_ct *ct,
>>   
>>   #ifdef CONFIG_DRM_I915_DEBUG_GUC
>>   	if (unlikely(tail != READ_ONCE(desc->tail))) {
>> -		CT_ERROR(ct, "Tail was modified %u != %u\n",
>> -			 desc->tail, tail);
>> +		ct_err(ct, "Tail was modified %u != %u\n", desc->tail, tail);
>>   		desc->status |= GUC_CTB_STATUS_MISMATCH;
>>   		goto corrupted;
>>   	}
>>   	if (unlikely(READ_ONCE(desc->head) >= size)) {
>> -		CT_ERROR(ct, "Invalid head offset %u >= %u)\n",
>> -			 desc->head, size);
>> +		ct_err(ct, "Invalid head offset %u >= %u)\n", desc->head, size);
>>   		desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>   		goto corrupted;
>>   	}
>> @@ -415,8 +431,8 @@ static int ct_write(struct intel_guc_ct *ct,
>>   		FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
>>   			   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
>>   
>> -	CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n",
>> -		 tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
>> +	ct_dbg(ct, "writing (tail %u) %*ph %*ph %*ph\n",
>> +	       tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
>>   
>>   	cmds[tail] = header;
>>   	tail = (tail + 1) % size;
>> @@ -447,8 +463,8 @@ static int ct_write(struct intel_guc_ct *ct,
>>   	return 0;
>>   
>>   corrupted:
>> -	CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
>> -		 desc->head, desc->tail, desc->status);
>> +	ct_err(ct, "Corrupted descriptor on write head=%u tail=%u status=%#x\n",
>> +	       desc->head, desc->tail, desc->status);
>>   	ctb->broken = true;
>>   	return -EPIPE;
>>   }
>> @@ -507,17 +523,14 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct)
>>   		struct guc_ct_buffer_desc *send = ct->ctbs.send.desc;
>>   		struct guc_ct_buffer_desc *recv = ct->ctbs.send.desc;
>>   
>> -		CT_ERROR(ct, "Communication stalled for %lld ms, desc status=%#x,%#x\n",
>> -			 ktime_ms_delta(ktime_get(), ct->stall_time),
>> -			 send->status, recv->status);
>> -		CT_ERROR(ct, "H2G Space: %u (Bytes)\n",
>> -			 atomic_read(&ct->ctbs.send.space) * 4);
>> -		CT_ERROR(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head);
>> -		CT_ERROR(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail);
>> -		CT_ERROR(ct, "G2H Space: %u (Bytes)\n",
>> -			 atomic_read(&ct->ctbs.recv.space) * 4);
>> -		CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
>> -		CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
>> +		ct_err(ct, "Communication stalled for %lld ms, desc status=%#x,%#x\n",
>> +		       ktime_ms_delta(ktime_get(), ct->stall_time), send->status, recv->status);
>> +		ct_err(ct, "H2G Space: %u (Bytes)\n", atomic_read(&ct->ctbs.send.space) * 4);
>> +		ct_err(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head);
>> +		ct_err(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail);
>> +		ct_err(ct, "G2H Space: %u (Bytes)\n", atomic_read(&ct->ctbs.recv.space) * 4);
>> +		ct_err(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
>> +		ct_err(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
>>   
>>   		ct->ctbs.send.broken = true;
>>   	}
>> @@ -563,8 +576,7 @@ static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
>>   
>>   	head = READ_ONCE(desc->head);
>>   	if (unlikely(head > ctb->size)) {
>> -		CT_ERROR(ct, "Invalid head offset %u >= %u)\n",
>> -			 head, ctb->size);
>> +		ct_err(ct, "Invalid head offset %u >= %u)\n", head, ctb->size);
>>   		desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>   		ctb->broken = true;
>>   		return false;
>> @@ -715,17 +727,17 @@ static int ct_send(struct intel_guc_ct *ct,
>>   			/* wait_for_ct_request_update returns -ENODEV on reset/suspend in progress.
>>   			 * In this case, output is debug rather than error info
>>   			 */
>> -			CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n",
>> -				 action[0], request.fence);
>> +			ct_dbg(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n",
>> +			       action[0], request.fence);
>>   		else
>> -			CT_ERROR(ct, "No response for request %#x (fence %u)\n",
>> -				 action[0], request.fence);
>> +			ct_err(ct, "No response for request %#x (fence %u)\n",
>> +			       action[0], request.fence);
>>   		goto unlink;
>>   	}
>>   
>>   	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
>> -		CT_DEBUG(ct, "retrying request %#x (%u)\n", *action,
>> -			 FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status));
>> +		ct_dbg(ct, "retrying request %#x (%u)\n", *action,
>> +		       FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status));
>>   		send_again = true;
>>   		goto unlink;
>>   	}
>> @@ -737,12 +749,12 @@ static int ct_send(struct intel_guc_ct *ct,
>>   
>>   	if (response_buf) {
>>   		/* There shall be no data in the status */
>> -		WARN_ON(FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, request.status));
>> +		ct_WARN_ON(ct, FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, request.status));
>>   		/* Return actual response len */
>>   		err = request.response_len;
>>   	} else {
>>   		/* There shall be no response payload */
>> -		WARN_ON(request.response_len);
>> +		ct_WARN_ON(ct, request.response_len);
>>   		/* Return data decoded from the status dword */
>>   		err = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, *status);
>>   	}
>> @@ -771,7 +783,7 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
>>   		struct intel_guc *guc = ct_to_guc(ct);
>>   		struct intel_uc *uc = container_of(guc, struct intel_uc, guc);
>>   
>> -		WARN(!uc->reset_in_progress, "Unexpected send: action=%#x\n", *action);
>> +		ct_WARN(ct, !uc->reset_in_progress, "Unexpected send: action=%#x\n", *action);
>>   		return -ENODEV;
>>   	}
>>   
>> @@ -784,11 +796,11 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
>>   	ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
>>   	if (unlikely(ret < 0)) {
>>   		if (ret != -ENODEV)
>> -			CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
>> -				 action[0], ERR_PTR(ret), status);
>> +			ct_err(ct, "sending action %#x failed (%pe) status=%#X\n",
>> +			       action[0], ERR_PTR(ret), status);
>>   	} else if (unlikely(ret)) {
>> -		CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
>> -			 action[0], ret, ret);
>> +		ct_dbg(ct, "send action %#x returned %d (%#x)\n",
>> +		       action[0], ret, ret);
>>   	}
>>   
>>   	return ret;
>> @@ -838,7 +850,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>>   			 * contexts/engines being reset. But should never happen as
>>   			 * no contexts should be active when CLIENT_RESET is sent.
>>   			 */
>> -			CT_ERROR(ct, "Unexpected G2H after GuC has stopped!\n");
>> +			ct_err(ct, "Unexpected G2H after GuC has stopped!\n");
>>   			status &= ~GUC_CTB_STATUS_UNUSED;
>>   		}
>>   
>> @@ -850,15 +862,13 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>>   
>>   #ifdef CONFIG_DRM_I915_DEBUG_GUC
>>   	if (unlikely(head != READ_ONCE(desc->head))) {
>> -		CT_ERROR(ct, "Head was modified %u != %u\n",
>> -			 desc->head, head);
>> +		ct_err(ct, "Head was modified %u != %u\n", desc->head, head);
>>   		desc->status |= GUC_CTB_STATUS_MISMATCH;
>>   		goto corrupted;
>>   	}
>>   #endif
>>   	if (unlikely(tail >= size)) {
>> -		CT_ERROR(ct, "Invalid tail offset %u >= %u)\n",
>> -			 tail, size);
>> +		ct_err(ct, "Invalid tail offset %u >= %u)\n", tail, size);
>>   		desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>   		goto corrupted;
>>   	}
>> @@ -873,7 +883,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>>   	/* beware of buffer wrap case */
>>   	if (unlikely(available < 0))
>>   		available += size;
>> -	CT_DEBUG(ct, "available %d (%u:%u:%u)\n", available, head, tail, size);
>> +	ct_dbg(ct, "read available %d (%u:%u:%u)\n", available, head, tail, size);
>>   	GEM_BUG_ON(available < 0);
>>   
>>   	header = cmds[head];
>> @@ -882,24 +892,24 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>>   	/* message len with header */
>>   	len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, header) + GUC_CTB_MSG_MIN_LEN;
>>   	if (unlikely(len > (u32)available)) {
>> -		CT_ERROR(ct, "Incomplete message %*ph %*ph %*ph\n",
>> -			 4, &header,
>> -			 4 * (head + available - 1 > size ?
>> -			      size - head : available - 1), &cmds[head],
>> -			 4 * (head + available - 1 > size ?
>> -			      available - 1 - size + head : 0), &cmds[0]);
>> +		ct_err(ct, "Incomplete message %*ph %*ph %*ph\n",
>> +		       4, &header,
>> +		       4 * (head + available - 1 > size ?
>> +			    size - head : available - 1), &cmds[head],
>> +		       4 * (head + available - 1 > size ?
>> +			    available - 1 - size + head : 0), &cmds[0]);
>>   		desc->status |= GUC_CTB_STATUS_UNDERFLOW;
>>   		goto corrupted;
>>   	}
>>   
>>   	*msg = ct_alloc_msg(len);
>>   	if (!*msg) {
>> -		CT_ERROR(ct, "No memory for message %*ph %*ph %*ph\n",
>> -			 4, &header,
>> -			 4 * (head + available - 1 > size ?
>> -			      size - head : available - 1), &cmds[head],
>> -			 4 * (head + available - 1 > size ?
>> -			      available - 1 - size + head : 0), &cmds[0]);
>> +		ct_err(ct, "No memory for message %*ph %*ph %*ph\n",
>> +		       4, &header,
>> +		       4 * (head + available - 1 > size ?
>> +			    size - head : available - 1), &cmds[head],
>> +		       4 * (head + available - 1 > size ?
>> +			    available - 1 - size + head : 0), &cmds[0]);
>>   		return available;
>>   	}
>>   
>> @@ -909,7 +919,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>>   		(*msg)->msg[i] = cmds[head];
>>   		head = (head + 1) % size;
>>   	}
>> -	CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg);
>> +	ct_dbg(ct, "received %*ph\n", 4 * len, (*msg)->msg);
>>   
>>   	/* update local copies */
>>   	ctb->head = head;
>> @@ -920,8 +930,8 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>>   	return available - len;
>>   
>>   corrupted:
>> -	CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
>> -		 desc->head, desc->tail, desc->status);
>> +	ct_err(ct, "Corrupted descriptor on read head=%u tail=%u status=%#x\n",
>> +	       desc->head, desc->tail, desc->status);
>>   	ctb->broken = true;
>>   	return -EPIPE;
>>   }
>> @@ -944,18 +954,17 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
>>   		   FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_NO_RESPONSE_RETRY &&
>>   		   FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_FAILURE);
>>   
>> -	CT_DEBUG(ct, "response fence %u status %#x\n", fence, hxg[0]);
>> +	ct_dbg(ct, "response fence %u status %#x\n", fence, hxg[0]);
>>   
>>   	spin_lock_irqsave(&ct->requests.lock, flags);
>>   	list_for_each_entry(req, &ct->requests.pending, link) {
>>   		if (unlikely(fence != req->fence)) {
>> -			CT_DEBUG(ct, "request %u awaits response\n",
>> -				 req->fence);
>> +			ct_dbg(ct, "request %u awaits response\n", req->fence);
>>   			continue;
>>   		}
>>   		if (unlikely(datalen > req->response_len)) {
>> -			CT_ERROR(ct, "Response %u too long (datalen %u > %u)\n",
>> -				 req->fence, datalen, req->response_len);
>> +			ct_err(ct, "response %u too long (datalen %u > %u)\n",
>> +			       req->fence, datalen, req->response_len);
>>   			datalen = min(datalen, req->response_len);
>>   			err = -EMSGSIZE;
>>   		}
>> @@ -967,12 +976,11 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
>>   		break;
>>   	}
>>   	if (!found) {
>> -		CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence);
>> -		CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n", fence,
>> -			 ct->requests.last_fence);
>> +		ct_err(ct, "Unsolicited response (fence %u)\n", fence);
>> +		ct_err(ct, "Could not find fence=%u, last_fence=%u\n", fence,
>> +		       ct->requests.last_fence);
>>   		list_for_each_entry(req, &ct->requests.pending, link)
>> -			CT_ERROR(ct, "request %u awaits response\n",
>> -				 req->fence);
>> +			ct_err(ct, "request %u awaits response\n", req->fence);
>>   		err = -ENOKEY;
>>   	}
>>   	spin_unlock_irqrestore(&ct->requests.lock, flags);
>> @@ -998,7 +1006,7 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
>>   	action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
>>   	len = hxg_len - GUC_HXG_MSG_MIN_LEN;
>>   
>> -	CT_DEBUG(ct, "request %x %*ph\n", action, 4 * len, payload);
>> +	ct_dbg(ct, "request %x %*ph\n", action, 4 * len, payload);
>>   
>>   	switch (action) {
>>   	case INTEL_GUC_ACTION_DEFAULT:
>> @@ -1016,9 +1024,6 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
>>   		break;
>>   	case INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
>>   		ret = intel_guc_error_capture_process_msg(guc, payload, len);
>> -		if (unlikely(ret))
>> -			CT_ERROR(ct, "error capture notification failed %x %*ph\n",
>> -				 action, 4 * len, payload);
>>   		break;
>>   	case INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
>>   		ret = intel_guc_engine_failure_process_msg(guc, payload, len);
>> @@ -1028,11 +1033,11 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
>>   		ret = 0;
>>   		break;
>>   	case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED:
>> -		CT_ERROR(ct, "Received GuC crash dump notification!\n");
>> +		guc_err(guc, "notification: Crash dump!\n");
>>   		ret = 0;
>>   		break;
>>   	case INTEL_GUC_ACTION_NOTIFY_EXCEPTION:
>> -		CT_ERROR(ct, "Received GuC exception notification!\n");
>> +		guc_err(guc, "notification: Exception!\n");
>>   		ret = 0;
>>   		break;
>>   	default:
>> @@ -1041,8 +1046,7 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
>>   	}
>>   
>>   	if (unlikely(ret)) {
>> -		CT_ERROR(ct, "Failed to process request %04x (%pe)\n",
>> -			 action, ERR_PTR(ret));
>> +		ct_err(ct, "Failed to process request %04x (%pe)\n", action, ERR_PTR(ret));
>>   		return ret;
>>   	}
>>   
>> @@ -1070,8 +1074,8 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct)
>>   
>>   	err = ct_process_request(ct, request);
>>   	if (unlikely(err)) {
>> -		CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n",
>> -			 ERR_PTR(err), 4 * request->size, request->msg);
>> +		ct_err(ct, "Failed to process message (%pe) %*ph\n",
>> +		       ERR_PTR(err), 4 * request->size, request->msg);
>>   		ct_free_msg(request);
>>   	}
>>   
>> @@ -1149,8 +1153,8 @@ static int ct_handle_hxg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg)
>>   
>>   	if (unlikely(err)) {
>>   failed:
>> -		CT_ERROR(ct, "Failed to handle HXG message (%pe) %*ph\n",
>> -			 ERR_PTR(err), 4 * GUC_HXG_MSG_MIN_LEN, hxg);
>> +		ct_err(ct, "Failed to handle HXG message (%pe) %*ph\n",
>> +		       ERR_PTR(err), 4 * GUC_HXG_MSG_MIN_LEN, hxg);
>>   	}
>>   	return err;
>>   }
>> @@ -1166,8 +1170,8 @@ static void ct_handle_msg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg)
>>   		err = -EOPNOTSUPP;
>>   
>>   	if (unlikely(err)) {
>> -		CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n",
>> -			 ERR_PTR(err), 4 * msg->size, msg->msg);
>> +		ct_err(ct, "Failed to handle message (%pe) %*ph\n",
>> +		       ERR_PTR(err), 4 * msg->size, msg->msg);
>>   		ct_free_msg(msg);
>>   	}
>>   }
>> @@ -1198,7 +1202,7 @@ static void ct_try_receive_message(struct intel_guc_ct *ct)
>>   {
>>   	int ret;
>>   
>> -	if (GEM_WARN_ON(!ct->enabled))
>> +	if (ct_WARN_ON(ct, !ct->enabled))
>>   		return;
>>   
>>   	ret = ct_receive(ct);
>> @@ -1220,7 +1224,7 @@ static void ct_receive_tasklet_func(struct tasklet_struct *t)
>>   void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
>>   {
>>   	if (unlikely(!ct->enabled)) {
>> -		WARN(1, "Unexpected GuC event received while CT disabled!\n");
>> +		ct_warn(ct, "Unexpected event received while disabled!\n");
>>   		return;
>>   	}
>>
Michal Wajdeczko Nov. 23, 2022, 8:45 p.m. UTC | #3
On 23.11.2022 02:25, John Harrison wrote:
> On 11/22/2022 09:54, Michal Wajdeczko wrote:
>> On 18.11.2022 02:58, John.C.Harrison@Intel.com wrote:
>>> From: John Harrison <John.C.Harrison@Intel.com>
>>>
>>> Re-work the existing GuC CT printers and extend as required to match
>>> the new wrapping scheme.
>>>
>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 222 +++++++++++-----------
>>>   1 file changed, 113 insertions(+), 109 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>> index 2b22065e87bf9..9d404fb377637 100644
>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>> @@ -18,31 +18,49 @@ static inline struct intel_guc *ct_to_guc(struct
>>> intel_guc_ct *ct)
>>>       return container_of(ct, struct intel_guc, ct);
>>>   }
>>>   -static inline struct intel_gt *ct_to_gt(struct intel_guc_ct *ct)
>>> -{
>>> -    return guc_to_gt(ct_to_guc(ct));
>>> -}
>>> -
>>>   static inline struct drm_i915_private *ct_to_i915(struct
>>> intel_guc_ct *ct)
>>>   {
>>> -    return ct_to_gt(ct)->i915;
>>> -}
>>> +    struct intel_guc *guc = ct_to_guc(ct);
>>> +    struct intel_gt *gt = guc_to_gt(guc);
>>>   -static inline struct drm_device *ct_to_drm(struct intel_guc_ct *ct)
>>> -{
>>> -    return &ct_to_i915(ct)->drm;
>>> +    return gt->i915;
>>>   }
>>>   -#define CT_ERROR(_ct, _fmt, ...) \
>>> -    drm_err(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
>>> +#define ct_err(_ct, _fmt, ...) \
>>> +    guc_err(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>> +
>>> +#define ct_warn(_ct, _fmt, ...) \
>>> +    guc_warn(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>> +
>>> +#define ct_notice(_ct, _fmt, ...) \
>>> +    guc_notice(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>> +
>>> +#define ct_info(_ct, _fmt, ...) \
>>> +    guc_info(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>> +
>>>   #ifdef CONFIG_DRM_I915_DEBUG_GUC
>>> -#define CT_DEBUG(_ct, _fmt, ...) \
>>> -    drm_dbg(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
>>> +#define ct_dbg(_ct, _fmt, ...) \
>>> +    guc_dbg(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>   #else
>>> -#define CT_DEBUG(...)    do { } while (0)
>>> +#define ct_dbg(...)    do { } while (0)
>>>   #endif
>>> -#define CT_PROBE_ERROR(_ct, _fmt, ...) \
>>> -    i915_probe_error(ct_to_i915(ct), "CT: " _fmt, ##__VA_ARGS__)
>>> +
>>> +#define ct_probe_error(_ct, _fmt, ...) \
>>> +    do { \
>>> +        if (i915_error_injected()) \
>>> +            ct_dbg(_ct, _fmt, ##__VA_ARGS__); \
>>> +        else \
>>> +            ct_err(_ct, _fmt, ##__VA_ARGS__); \
>>> +    } while (0)
>> guc_probe_error ?
>>
>>> +
>>> +#define ct_WARN_ON(_ct, _condition) \
>>> +    ct_WARN(_ct, _condition, "%s", "ct_WARN_ON("
>>> __stringify(_condition) ")")
>>> +
>>> +#define ct_WARN(_ct, _condition, _fmt, ...) \
>>> +    guc_WARN(ct_to_guc(_ct), _condition, "CT " _fmt, ##__VA_ARGS__)
>>> +
>>> +#define ct_WARN_ONCE(_ct, _condition, _fmt, ...) \
>>> +    guc_WARN_ONCE(ct_to_guc(_ct), _condition, "CT " _fmt,
>>> ##__VA_ARGS__)
>>>     /**
>>>    * DOC: CTB Blob
>>> @@ -170,7 +188,7 @@ static int ct_control_enable(struct intel_guc_ct
>>> *ct, bool enable)
>>>       err = guc_action_control_ctb(ct_to_guc(ct), enable ?
>>>                        GUC_CTB_CONTROL_ENABLE :
>>> GUC_CTB_CONTROL_DISABLE);
>>>       if (unlikely(err))
>>> -        CT_PROBE_ERROR(ct, "Failed to control/%s CTB (%pe)\n",
>>> +        ct_probe_error(ct, "Failed to control/%s CTB (%pe)\n",
>>>                      str_enable_disable(enable), ERR_PTR(err));
>> btw, shouldn't we change all messages to start with lowercase ?
>>
>> was:
>>     "CT0: Failed to control/%s CTB (%pe)"
>> is:
>>     "GT0: GuC CT Failed to control/%s CTB (%pe)"
>>
>> unless we keep colon (as suggested by Tvrtko) as then:
>>
>>     "GT0: GuC CT: Failed to control/%s CTB (%pe)"
> Blanket added the colon makes it messy when a string actually wants to
> start with the prefix. The rule I've been using is lower case word when
> the prefix was part of the string, upper case word when the prefix is

Hmm, I'm not sure that we should attempt to have such a flexible rule as
we shouldn't rely too much on actual format of the prefix as it could be
changed any time.  All we should know about final log message is that it
_will_ properly identify the "GT" or "GuC" that this log is related to.

So I would suggest to be just consistent and probably always start with
upper case, as that seems to be mostly used in kernel error logs, and
just make sure that any prefix will honor that (by including colon, or
braces), so this will always work like:

"[drm] *ERROR* GT0: Failed to foo (-EIO)"
"[drm] *ERROR* GT0: GUC: Failed to foo (-EIO)"
"[drm] *ERROR* GT0: GUC: CT: Failed to foo (-EIO)"

or

"[drm] *ERROR* GT0: Failed to foo (-EIO)"
"[drm] *ERROR* GT0: [GUC] Failed to foo (-EIO)"
"[drm] *ERROR* GT0: [GUC] CT: Failed to foo (-EIO)"

and even for:

"[drm] *ERROR* GT(root) Failed to foo (-EIO)"
"[drm] *ERROR* GuC(media) Failed to foo (-EIO)"
"[drm] *ERROR* GT0 [GuC:CT] Failed to foo (-EIO)"


> just being added as a prefix. I originally just had the prefix as raw
> with no trailing space, so the individual print could decide to add a
> colon, a space, or whatever as appropriate. But that just makes for
> messy code with some files having every string look like ": Stuff
> happened" and other files have every string look like " failed to ...".
> The current version seems to be the most readable from the point of view
> of writing the code and of reading the dmesg results.
> 
> And to be clear, the 'CT0' you have in your 'was' example only exists in
> the internal tree. It never made it to upstream. It is also just plain
> wrong. Each GT has two CTs - send and receive. So having 'CT1' meaning
> some random CT on GT1 (as opposed to the read channel on GT0, for
> example) was very confusing.

I don't think I was wrong, it might be you confusing CT with CTB.

Note that we only support _single_ CT (Command Transport) and this
single CT includes _pair_ of CTBs (Command Transport Buffers).

And logs/macros are CT oriented, not CTB.

Regarding those my internal changes that were printing "CT%u": they were
added exactly for the same reason as your series: to know which GT
traffic we are tracing.  And were done locally on CT level only as there
was no helpers that would automatically append "GT%u" prefix - helpers
that you're defining right now ;)

Michal

> 
> John.
> 
> 
>>
>> Michal
>>
>>>         return err;
>>> @@ -201,7 +219,7 @@ static int ct_register_buffer(struct intel_guc_ct
>>> *ct, bool send,
>>>                      size);
>>>       if (unlikely(err))
>>>   failed:
>>> -        CT_PROBE_ERROR(ct, "Failed to register %s buffer (%pe)\n",
>>> +        ct_probe_error(ct, "Failed to register %s buffer (%pe)\n",
>>>                      send ? "SEND" : "RECV", ERR_PTR(err));
>>>         return err;
>>> @@ -235,21 +253,21 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
>>>       blob_size = 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
>>> CTB_G2H_BUFFER_SIZE;
>>>       err = intel_guc_allocate_and_map_vma(guc, blob_size, &ct->vma,
>>> &blob);
>>>       if (unlikely(err)) {
>>> -        CT_PROBE_ERROR(ct, "Failed to allocate %u for CTB data
>>> (%pe)\n",
>>> +        ct_probe_error(ct, "Failed to allocate %u for CTB data
>>> (%pe)\n",
>>>                      blob_size, ERR_PTR(err));
>>>           return err;
>>>       }
>>>   -    CT_DEBUG(ct, "base=%#x size=%u\n", intel_guc_ggtt_offset(guc,
>>> ct->vma), blob_size);
>>> +    ct_dbg(ct, "base=%#x size=%u\n", intel_guc_ggtt_offset(guc,
>>> ct->vma), blob_size);
>>>         /* store pointers to desc and cmds for send ctb */
>>>       desc = blob;
>>>       cmds = blob + 2 * CTB_DESC_SIZE;
>>>       cmds_size = CTB_H2G_BUFFER_SIZE;
>>>       resv_space = 0;
>>> -    CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send",
>>> -         ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>>> -         resv_space);
>>> +    ct_dbg(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send",
>>> +           ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>>> +           resv_space);
>>>         guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size,
>>> resv_space);
>>>   @@ -258,9 +276,9 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
>>>       cmds = blob + 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE;
>>>       cmds_size = CTB_G2H_BUFFER_SIZE;
>>>       resv_space = G2H_ROOM_BUFFER_SIZE;
>>> -    CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv",
>>> -         ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>>> -         resv_space);
>>> +    ct_dbg(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv",
>>> +           ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>>> +           resv_space);
>>>         guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size,
>>> resv_space);
>>>   @@ -338,7 +356,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
>>>       return 0;
>>>     err_out:
>>> -    CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
>>> +    ct_probe_error(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
>>>       return err;
>>>   }
>>>   @@ -387,14 +405,12 @@ static int ct_write(struct intel_guc_ct *ct,
>>>     #ifdef CONFIG_DRM_I915_DEBUG_GUC
>>>       if (unlikely(tail != READ_ONCE(desc->tail))) {
>>> -        CT_ERROR(ct, "Tail was modified %u != %u\n",
>>> -             desc->tail, tail);
>>> +        ct_err(ct, "Tail was modified %u != %u\n", desc->tail, tail);
>>>           desc->status |= GUC_CTB_STATUS_MISMATCH;
>>>           goto corrupted;
>>>       }
>>>       if (unlikely(READ_ONCE(desc->head) >= size)) {
>>> -        CT_ERROR(ct, "Invalid head offset %u >= %u)\n",
>>> -             desc->head, size);
>>> +        ct_err(ct, "Invalid head offset %u >= %u)\n", desc->head,
>>> size);
>>>           desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>           goto corrupted;
>>>       }
>>> @@ -415,8 +431,8 @@ static int ct_write(struct intel_guc_ct *ct,
>>>           FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
>>>                  GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
>>>   -    CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n",
>>> -         tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
>>> +    ct_dbg(ct, "writing (tail %u) %*ph %*ph %*ph\n",
>>> +           tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
>>>         cmds[tail] = header;
>>>       tail = (tail + 1) % size;
>>> @@ -447,8 +463,8 @@ static int ct_write(struct intel_guc_ct *ct,
>>>       return 0;
>>>     corrupted:
>>> -    CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
>>> -         desc->head, desc->tail, desc->status);
>>> +    ct_err(ct, "Corrupted descriptor on write head=%u tail=%u
>>> status=%#x\n",
>>> +           desc->head, desc->tail, desc->status);
>>>       ctb->broken = true;
>>>       return -EPIPE;
>>>   }
>>> @@ -507,17 +523,14 @@ static inline bool ct_deadlocked(struct
>>> intel_guc_ct *ct)
>>>           struct guc_ct_buffer_desc *send = ct->ctbs.send.desc;
>>>           struct guc_ct_buffer_desc *recv = ct->ctbs.send.desc;
>>>   -        CT_ERROR(ct, "Communication stalled for %lld ms, desc
>>> status=%#x,%#x\n",
>>> -             ktime_ms_delta(ktime_get(), ct->stall_time),
>>> -             send->status, recv->status);
>>> -        CT_ERROR(ct, "H2G Space: %u (Bytes)\n",
>>> -             atomic_read(&ct->ctbs.send.space) * 4);
>>> -        CT_ERROR(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head);
>>> -        CT_ERROR(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail);
>>> -        CT_ERROR(ct, "G2H Space: %u (Bytes)\n",
>>> -             atomic_read(&ct->ctbs.recv.space) * 4);
>>> -        CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
>>> -        CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
>>> +        ct_err(ct, "Communication stalled for %lld ms, desc
>>> status=%#x,%#x\n",
>>> +               ktime_ms_delta(ktime_get(), ct->stall_time),
>>> send->status, recv->status);
>>> +        ct_err(ct, "H2G Space: %u (Bytes)\n",
>>> atomic_read(&ct->ctbs.send.space) * 4);
>>> +        ct_err(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head);
>>> +        ct_err(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail);
>>> +        ct_err(ct, "G2H Space: %u (Bytes)\n",
>>> atomic_read(&ct->ctbs.recv.space) * 4);
>>> +        ct_err(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
>>> +        ct_err(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
>>>             ct->ctbs.send.broken = true;
>>>       }
>>> @@ -563,8 +576,7 @@ static inline bool h2g_has_room(struct
>>> intel_guc_ct *ct, u32 len_dw)
>>>         head = READ_ONCE(desc->head);
>>>       if (unlikely(head > ctb->size)) {
>>> -        CT_ERROR(ct, "Invalid head offset %u >= %u)\n",
>>> -             head, ctb->size);
>>> +        ct_err(ct, "Invalid head offset %u >= %u)\n", head, ctb->size);
>>>           desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>           ctb->broken = true;
>>>           return false;
>>> @@ -715,17 +727,17 @@ static int ct_send(struct intel_guc_ct *ct,
>>>               /* wait_for_ct_request_update returns -ENODEV on
>>> reset/suspend in progress.
>>>                * In this case, output is debug rather than error info
>>>                */
>>> -            CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is
>>> disabled\n",
>>> -                 action[0], request.fence);
>>> +            ct_dbg(ct, "Request %#x (fence %u) cancelled as CTB is
>>> disabled\n",
>>> +                   action[0], request.fence);
>>>           else
>>> -            CT_ERROR(ct, "No response for request %#x (fence %u)\n",
>>> -                 action[0], request.fence);
>>> +            ct_err(ct, "No response for request %#x (fence %u)\n",
>>> +                   action[0], request.fence);
>>>           goto unlink;
>>>       }
>>>         if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) ==
>>> GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
>>> -        CT_DEBUG(ct, "retrying request %#x (%u)\n", *action,
>>> -             FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status));
>>> +        ct_dbg(ct, "retrying request %#x (%u)\n", *action,
>>> +               FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status));
>>>           send_again = true;
>>>           goto unlink;
>>>       }
>>> @@ -737,12 +749,12 @@ static int ct_send(struct intel_guc_ct *ct,
>>>         if (response_buf) {
>>>           /* There shall be no data in the status */
>>> -        WARN_ON(FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0,
>>> request.status));
>>> +        ct_WARN_ON(ct, FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0,
>>> request.status));
>>>           /* Return actual response len */
>>>           err = request.response_len;
>>>       } else {
>>>           /* There shall be no response payload */
>>> -        WARN_ON(request.response_len);
>>> +        ct_WARN_ON(ct, request.response_len);
>>>           /* Return data decoded from the status dword */
>>>           err = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, *status);
>>>       }
>>> @@ -771,7 +783,7 @@ int intel_guc_ct_send(struct intel_guc_ct *ct,
>>> const u32 *action, u32 len,
>>>           struct intel_guc *guc = ct_to_guc(ct);
>>>           struct intel_uc *uc = container_of(guc, struct intel_uc, guc);
>>>   -        WARN(!uc->reset_in_progress, "Unexpected send:
>>> action=%#x\n", *action);
>>> +        ct_WARN(ct, !uc->reset_in_progress, "Unexpected send:
>>> action=%#x\n", *action);
>>>           return -ENODEV;
>>>       }
>>>   @@ -784,11 +796,11 @@ int intel_guc_ct_send(struct intel_guc_ct
>>> *ct, const u32 *action, u32 len,
>>>       ret = ct_send(ct, action, len, response_buf, response_buf_size,
>>> &status);
>>>       if (unlikely(ret < 0)) {
>>>           if (ret != -ENODEV)
>>> -            CT_ERROR(ct, "Sending action %#x failed (%pe)
>>> status=%#X\n",
>>> -                 action[0], ERR_PTR(ret), status);
>>> +            ct_err(ct, "sending action %#x failed (%pe) status=%#X\n",
>>> +                   action[0], ERR_PTR(ret), status);
>>>       } else if (unlikely(ret)) {
>>> -        CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
>>> -             action[0], ret, ret);
>>> +        ct_dbg(ct, "send action %#x returned %d (%#x)\n",
>>> +               action[0], ret, ret);
>>>       }
>>>         return ret;
>>> @@ -838,7 +850,7 @@ static int ct_read(struct intel_guc_ct *ct,
>>> struct ct_incoming_msg **msg)
>>>                * contexts/engines being reset. But should never
>>> happen as
>>>                * no contexts should be active when CLIENT_RESET is sent.
>>>                */
>>> -            CT_ERROR(ct, "Unexpected G2H after GuC has stopped!\n");
>>> +            ct_err(ct, "Unexpected G2H after GuC has stopped!\n");
>>>               status &= ~GUC_CTB_STATUS_UNUSED;
>>>           }
>>>   @@ -850,15 +862,13 @@ static int ct_read(struct intel_guc_ct *ct,
>>> struct ct_incoming_msg **msg)
>>>     #ifdef CONFIG_DRM_I915_DEBUG_GUC
>>>       if (unlikely(head != READ_ONCE(desc->head))) {
>>> -        CT_ERROR(ct, "Head was modified %u != %u\n",
>>> -             desc->head, head);
>>> +        ct_err(ct, "Head was modified %u != %u\n", desc->head, head);
>>>           desc->status |= GUC_CTB_STATUS_MISMATCH;
>>>           goto corrupted;
>>>       }
>>>   #endif
>>>       if (unlikely(tail >= size)) {
>>> -        CT_ERROR(ct, "Invalid tail offset %u >= %u)\n",
>>> -             tail, size);
>>> +        ct_err(ct, "Invalid tail offset %u >= %u)\n", tail, size);
>>>           desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>           goto corrupted;
>>>       }
>>> @@ -873,7 +883,7 @@ static int ct_read(struct intel_guc_ct *ct,
>>> struct ct_incoming_msg **msg)
>>>       /* beware of buffer wrap case */
>>>       if (unlikely(available < 0))
>>>           available += size;
>>> -    CT_DEBUG(ct, "available %d (%u:%u:%u)\n", available, head, tail,
>>> size);
>>> +    ct_dbg(ct, "read available %d (%u:%u:%u)\n", available, head,
>>> tail, size);
>>>       GEM_BUG_ON(available < 0);
>>>         header = cmds[head];
>>> @@ -882,24 +892,24 @@ static int ct_read(struct intel_guc_ct *ct,
>>> struct ct_incoming_msg **msg)
>>>       /* message len with header */
>>>       len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, header) +
>>> GUC_CTB_MSG_MIN_LEN;
>>>       if (unlikely(len > (u32)available)) {
>>> -        CT_ERROR(ct, "Incomplete message %*ph %*ph %*ph\n",
>>> -             4, &header,
>>> -             4 * (head + available - 1 > size ?
>>> -                  size - head : available - 1), &cmds[head],
>>> -             4 * (head + available - 1 > size ?
>>> -                  available - 1 - size + head : 0), &cmds[0]);
>>> +        ct_err(ct, "Incomplete message %*ph %*ph %*ph\n",
>>> +               4, &header,
>>> +               4 * (head + available - 1 > size ?
>>> +                size - head : available - 1), &cmds[head],
>>> +               4 * (head + available - 1 > size ?
>>> +                available - 1 - size + head : 0), &cmds[0]);
>>>           desc->status |= GUC_CTB_STATUS_UNDERFLOW;
>>>           goto corrupted;
>>>       }
>>>         *msg = ct_alloc_msg(len);
>>>       if (!*msg) {
>>> -        CT_ERROR(ct, "No memory for message %*ph %*ph %*ph\n",
>>> -             4, &header,
>>> -             4 * (head + available - 1 > size ?
>>> -                  size - head : available - 1), &cmds[head],
>>> -             4 * (head + available - 1 > size ?
>>> -                  available - 1 - size + head : 0), &cmds[0]);
>>> +        ct_err(ct, "No memory for message %*ph %*ph %*ph\n",
>>> +               4, &header,
>>> +               4 * (head + available - 1 > size ?
>>> +                size - head : available - 1), &cmds[head],
>>> +               4 * (head + available - 1 > size ?
>>> +                available - 1 - size + head : 0), &cmds[0]);
>>>           return available;
>>>       }
>>>   @@ -909,7 +919,7 @@ static int ct_read(struct intel_guc_ct *ct,
>>> struct ct_incoming_msg **msg)
>>>           (*msg)->msg[i] = cmds[head];
>>>           head = (head + 1) % size;
>>>       }
>>> -    CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg);
>>> +    ct_dbg(ct, "received %*ph\n", 4 * len, (*msg)->msg);
>>>         /* update local copies */
>>>       ctb->head = head;
>>> @@ -920,8 +930,8 @@ static int ct_read(struct intel_guc_ct *ct,
>>> struct ct_incoming_msg **msg)
>>>       return available - len;
>>>     corrupted:
>>> -    CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
>>> -         desc->head, desc->tail, desc->status);
>>> +    ct_err(ct, "Corrupted descriptor on read head=%u tail=%u
>>> status=%#x\n",
>>> +           desc->head, desc->tail, desc->status);
>>>       ctb->broken = true;
>>>       return -EPIPE;
>>>   }
>>> @@ -944,18 +954,17 @@ static int ct_handle_response(struct
>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>              FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) !=
>>> GUC_HXG_TYPE_NO_RESPONSE_RETRY &&
>>>              FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) !=
>>> GUC_HXG_TYPE_RESPONSE_FAILURE);
>>>   -    CT_DEBUG(ct, "response fence %u status %#x\n", fence, hxg[0]);
>>> +    ct_dbg(ct, "response fence %u status %#x\n", fence, hxg[0]);
>>>         spin_lock_irqsave(&ct->requests.lock, flags);
>>>       list_for_each_entry(req, &ct->requests.pending, link) {
>>>           if (unlikely(fence != req->fence)) {
>>> -            CT_DEBUG(ct, "request %u awaits response\n",
>>> -                 req->fence);
>>> +            ct_dbg(ct, "request %u awaits response\n", req->fence);
>>>               continue;
>>>           }
>>>           if (unlikely(datalen > req->response_len)) {
>>> -            CT_ERROR(ct, "Response %u too long (datalen %u > %u)\n",
>>> -                 req->fence, datalen, req->response_len);
>>> +            ct_err(ct, "response %u too long (datalen %u > %u)\n",
>>> +                   req->fence, datalen, req->response_len);
>>>               datalen = min(datalen, req->response_len);
>>>               err = -EMSGSIZE;
>>>           }
>>> @@ -967,12 +976,11 @@ static int ct_handle_response(struct
>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>           break;
>>>       }
>>>       if (!found) {
>>> -        CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence);
>>> -        CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n", fence,
>>> -             ct->requests.last_fence);
>>> +        ct_err(ct, "Unsolicited response (fence %u)\n", fence);
>>> +        ct_err(ct, "Could not find fence=%u, last_fence=%u\n", fence,
>>> +               ct->requests.last_fence);
>>>           list_for_each_entry(req, &ct->requests.pending, link)
>>> -            CT_ERROR(ct, "request %u awaits response\n",
>>> -                 req->fence);
>>> +            ct_err(ct, "request %u awaits response\n", req->fence);
>>>           err = -ENOKEY;
>>>       }
>>>       spin_unlock_irqrestore(&ct->requests.lock, flags);
>>> @@ -998,7 +1006,7 @@ static int ct_process_request(struct
>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>       action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
>>>       len = hxg_len - GUC_HXG_MSG_MIN_LEN;
>>>   -    CT_DEBUG(ct, "request %x %*ph\n", action, 4 * len, payload);
>>> +    ct_dbg(ct, "request %x %*ph\n", action, 4 * len, payload);
>>>         switch (action) {
>>>       case INTEL_GUC_ACTION_DEFAULT:
>>> @@ -1016,9 +1024,6 @@ static int ct_process_request(struct
>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>           break;
>>>       case INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
>>>           ret = intel_guc_error_capture_process_msg(guc, payload, len);
>>> -        if (unlikely(ret))
>>> -            CT_ERROR(ct, "error capture notification failed %x %*ph\n",
>>> -                 action, 4 * len, payload);
>>>           break;
>>>       case INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
>>>           ret = intel_guc_engine_failure_process_msg(guc, payload, len);
>>> @@ -1028,11 +1033,11 @@ static int ct_process_request(struct
>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>           ret = 0;
>>>           break;
>>>       case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED:
>>> -        CT_ERROR(ct, "Received GuC crash dump notification!\n");
>>> +        guc_err(guc, "notification: Crash dump!\n");
>>>           ret = 0;
>>>           break;
>>>       case INTEL_GUC_ACTION_NOTIFY_EXCEPTION:
>>> -        CT_ERROR(ct, "Received GuC exception notification!\n");
>>> +        guc_err(guc, "notification: Exception!\n");
>>>           ret = 0;
>>>           break;
>>>       default:
>>> @@ -1041,8 +1046,7 @@ static int ct_process_request(struct
>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>       }
>>>         if (unlikely(ret)) {
>>> -        CT_ERROR(ct, "Failed to process request %04x (%pe)\n",
>>> -             action, ERR_PTR(ret));
>>> +        ct_err(ct, "Failed to process request %04x (%pe)\n", action,
>>> ERR_PTR(ret));
>>>           return ret;
>>>       }
>>>   @@ -1070,8 +1074,8 @@ static bool
>>> ct_process_incoming_requests(struct intel_guc_ct *ct)
>>>         err = ct_process_request(ct, request);
>>>       if (unlikely(err)) {
>>> -        CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n",
>>> -             ERR_PTR(err), 4 * request->size, request->msg);
>>> +        ct_err(ct, "Failed to process message (%pe) %*ph\n",
>>> +               ERR_PTR(err), 4 * request->size, request->msg);
>>>           ct_free_msg(request);
>>>       }
>>>   @@ -1149,8 +1153,8 @@ static int ct_handle_hxg(struct intel_guc_ct
>>> *ct, struct ct_incoming_msg *msg)
>>>         if (unlikely(err)) {
>>>   failed:
>>> -        CT_ERROR(ct, "Failed to handle HXG message (%pe) %*ph\n",
>>> -             ERR_PTR(err), 4 * GUC_HXG_MSG_MIN_LEN, hxg);
>>> +        ct_err(ct, "Failed to handle HXG message (%pe) %*ph\n",
>>> +               ERR_PTR(err), 4 * GUC_HXG_MSG_MIN_LEN, hxg);
>>>       }
>>>       return err;
>>>   }
>>> @@ -1166,8 +1170,8 @@ static void ct_handle_msg(struct intel_guc_ct
>>> *ct, struct ct_incoming_msg *msg)
>>>           err = -EOPNOTSUPP;
>>>         if (unlikely(err)) {
>>> -        CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n",
>>> -             ERR_PTR(err), 4 * msg->size, msg->msg);
>>> +        ct_err(ct, "Failed to handle message (%pe) %*ph\n",
>>> +               ERR_PTR(err), 4 * msg->size, msg->msg);
>>>           ct_free_msg(msg);
>>>       }
>>>   }
>>> @@ -1198,7 +1202,7 @@ static void ct_try_receive_message(struct
>>> intel_guc_ct *ct)
>>>   {
>>>       int ret;
>>>   -    if (GEM_WARN_ON(!ct->enabled))
>>> +    if (ct_WARN_ON(ct, !ct->enabled))
>>>           return;
>>>         ret = ct_receive(ct);
>>> @@ -1220,7 +1224,7 @@ static void ct_receive_tasklet_func(struct
>>> tasklet_struct *t)
>>>   void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
>>>   {
>>>       if (unlikely(!ct->enabled)) {
>>> -        WARN(1, "Unexpected GuC event received while CT disabled!\n");
>>> +        ct_warn(ct, "Unexpected event received while disabled!\n");
>>>           return;
>>>       }
>>>   
>
John Harrison Dec. 1, 2022, 12:41 a.m. UTC | #4
On 11/23/2022 12:45, Michal Wajdeczko wrote:
> On 23.11.2022 02:25, John Harrison wrote:
>> On 11/22/2022 09:54, Michal Wajdeczko wrote:
>>> On 18.11.2022 02:58, John.C.Harrison@Intel.com wrote:
>>>> From: John Harrison <John.C.Harrison@Intel.com>
>>>>
>>>> Re-work the existing GuC CT printers and extend as required to match
>>>> the new wrapping scheme.
>>>>
>>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 222 +++++++++++-----------
>>>>    1 file changed, 113 insertions(+), 109 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>> index 2b22065e87bf9..9d404fb377637 100644
>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>> @@ -18,31 +18,49 @@ static inline struct intel_guc *ct_to_guc(struct
>>>> intel_guc_ct *ct)
>>>>        return container_of(ct, struct intel_guc, ct);
>>>>    }
>>>>    -static inline struct intel_gt *ct_to_gt(struct intel_guc_ct *ct)
>>>> -{
>>>> -    return guc_to_gt(ct_to_guc(ct));
>>>> -}
>>>> -
>>>>    static inline struct drm_i915_private *ct_to_i915(struct
>>>> intel_guc_ct *ct)
>>>>    {
>>>> -    return ct_to_gt(ct)->i915;
>>>> -}
>>>> +    struct intel_guc *guc = ct_to_guc(ct);
>>>> +    struct intel_gt *gt = guc_to_gt(guc);
>>>>    -static inline struct drm_device *ct_to_drm(struct intel_guc_ct *ct)
>>>> -{
>>>> -    return &ct_to_i915(ct)->drm;
>>>> +    return gt->i915;
>>>>    }
>>>>    -#define CT_ERROR(_ct, _fmt, ...) \
>>>> -    drm_err(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
>>>> +#define ct_err(_ct, _fmt, ...) \
>>>> +    guc_err(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>> +
>>>> +#define ct_warn(_ct, _fmt, ...) \
>>>> +    guc_warn(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>> +
>>>> +#define ct_notice(_ct, _fmt, ...) \
>>>> +    guc_notice(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>> +
>>>> +#define ct_info(_ct, _fmt, ...) \
>>>> +    guc_info(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>> +
>>>>    #ifdef CONFIG_DRM_I915_DEBUG_GUC
>>>> -#define CT_DEBUG(_ct, _fmt, ...) \
>>>> -    drm_dbg(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
>>>> +#define ct_dbg(_ct, _fmt, ...) \
>>>> +    guc_dbg(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>>    #else
>>>> -#define CT_DEBUG(...)    do { } while (0)
>>>> +#define ct_dbg(...)    do { } while (0)
>>>>    #endif
>>>> -#define CT_PROBE_ERROR(_ct, _fmt, ...) \
>>>> -    i915_probe_error(ct_to_i915(ct), "CT: " _fmt, ##__VA_ARGS__)
>>>> +
>>>> +#define ct_probe_error(_ct, _fmt, ...) \
>>>> +    do { \
>>>> +        if (i915_error_injected()) \
>>>> +            ct_dbg(_ct, _fmt, ##__VA_ARGS__); \
>>>> +        else \
>>>> +            ct_err(_ct, _fmt, ##__VA_ARGS__); \
>>>> +    } while (0)
>>> guc_probe_error ?
>>>
>>>> +
>>>> +#define ct_WARN_ON(_ct, _condition) \
>>>> +    ct_WARN(_ct, _condition, "%s", "ct_WARN_ON("
>>>> __stringify(_condition) ")")
>>>> +
>>>> +#define ct_WARN(_ct, _condition, _fmt, ...) \
>>>> +    guc_WARN(ct_to_guc(_ct), _condition, "CT " _fmt, ##__VA_ARGS__)
>>>> +
>>>> +#define ct_WARN_ONCE(_ct, _condition, _fmt, ...) \
>>>> +    guc_WARN_ONCE(ct_to_guc(_ct), _condition, "CT " _fmt,
>>>> ##__VA_ARGS__)
>>>>      /**
>>>>     * DOC: CTB Blob
>>>> @@ -170,7 +188,7 @@ static int ct_control_enable(struct intel_guc_ct
>>>> *ct, bool enable)
>>>>        err = guc_action_control_ctb(ct_to_guc(ct), enable ?
>>>>                         GUC_CTB_CONTROL_ENABLE :
>>>> GUC_CTB_CONTROL_DISABLE);
>>>>        if (unlikely(err))
>>>> -        CT_PROBE_ERROR(ct, "Failed to control/%s CTB (%pe)\n",
>>>> +        ct_probe_error(ct, "Failed to control/%s CTB (%pe)\n",
>>>>                       str_enable_disable(enable), ERR_PTR(err));
>>> btw, shouldn't we change all messages to start with lowercase ?
>>>
>>> was:
>>>      "CT0: Failed to control/%s CTB (%pe)"
>>> is:
>>>      "GT0: GuC CT Failed to control/%s CTB (%pe)"
>>>
>>> unless we keep colon (as suggested by Tvrtko) as then:
>>>
>>>      "GT0: GuC CT: Failed to control/%s CTB (%pe)"
>> Blanket added the colon makes it messy when a string actually wants to
>> start with the prefix. The rule I've been using is lower case word when
>> the prefix was part of the string, upper case word when the prefix is
> Hmm, I'm not sure that we should attempt to have such a flexible rule as
> we shouldn't rely too much on actual format of the prefix as it could be
> changed any time.  All we should know about final log message is that it
> _will_ properly identify the "GT" or "GuC" that this log is related to.
>
> So I would suggest to be just consistent and probably always start with
> upper case, as that seems to be mostly used in kernel error logs, and
> just make sure that any prefix will honor that (by including colon, or
> braces), so this will always work like:
>
> "[drm] *ERROR* GT0: Failed to foo (-EIO)"
> "[drm] *ERROR* GT0: GUC: Failed to foo (-EIO)"
> "[drm] *ERROR* GT0: GUC: CT: Failed to foo (-EIO)"
>
> or
>
> "[drm] *ERROR* GT0: Failed to foo (-EIO)"
> "[drm] *ERROR* GT0: [GUC] Failed to foo (-EIO)"
> "[drm] *ERROR* GT0: [GUC] CT: Failed to foo (-EIO)"
>
> and even for:
>
> "[drm] *ERROR* GT(root) Failed to foo (-EIO)"
> "[drm] *ERROR* GuC(media) Failed to foo (-EIO)"
> "[drm] *ERROR* GT0 [GuC:CT] Failed to foo (-EIO)"
All of which are hideous/complex/verbose/inconsistent. 'GT0: GUC: CT:'? 
Really? Or 'GT0: [GUC] CT:'? Why the random mix of separators? And how 
would you implement '[GUC:CT]' without having a CT definition that is 
entirely self contained and does chain on to the GuC level version?

This is pointless bikeshedding. If you want to re-write every single 
debug print (yet again) and invent much more complicated macro 
definitions then feel free to take over the patch set. If not can we 
just approve the v3 version and move on to doing some actual work?

John.


>
>
>> just being added as a prefix. I originally just had the prefix as raw
>> with no trailing space, so the individual print could decide to add a
>> colon, a space, or whatever as appropriate. But that just makes for
>> messy code with some files having every string look like ": Stuff
>> happened" and other files have every string look like " failed to ...".
>> The current version seems to be the most readable from the point of view
>> of writing the code and of reading the dmesg results.
>>
>> And to be clear, the 'CT0' you have in your 'was' example only exists in
>> the internal tree. It never made it to upstream. It is also just plain
>> wrong. Each GT has two CTs - send and receive. So having 'CT1' meaning
>> some random CT on GT1 (as opposed to the read channel on GT0, for
>> example) was very confusing.
> I don't think I was wrong, it might be you confusing CT with CTB.
>
> Note that we only support _single_ CT (Command Transport) and this
> single CT includes _pair_ of CTBs (Command Transport Buffers).
>
> And logs/macros are CT oriented, not CTB.
The GuC spec does not make any distinction. In fact, there is no CT in 
the spec. There is just CTB. So at best it is ambiguous as to what CT0 / 
CT1 refers to.

>
> Regarding those my internal changes that were printing "CT%u": they were
> added exactly for the same reason as your series: to know which GT
> traffic we are tracing.  And were done locally on CT level only as there
> was no helpers that would automatically append "GT%u" prefix - helpers
> that you're defining right now ;)
>
> Michal
>
>> John.
>>
>>
>>> Michal
>>>
>>>>          return err;
>>>> @@ -201,7 +219,7 @@ static int ct_register_buffer(struct intel_guc_ct
>>>> *ct, bool send,
>>>>                       size);
>>>>        if (unlikely(err))
>>>>    failed:
>>>> -        CT_PROBE_ERROR(ct, "Failed to register %s buffer (%pe)\n",
>>>> +        ct_probe_error(ct, "Failed to register %s buffer (%pe)\n",
>>>>                       send ? "SEND" : "RECV", ERR_PTR(err));
>>>>          return err;
>>>> @@ -235,21 +253,21 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
>>>>        blob_size = 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
>>>> CTB_G2H_BUFFER_SIZE;
>>>>        err = intel_guc_allocate_and_map_vma(guc, blob_size, &ct->vma,
>>>> &blob);
>>>>        if (unlikely(err)) {
>>>> -        CT_PROBE_ERROR(ct, "Failed to allocate %u for CTB data
>>>> (%pe)\n",
>>>> +        ct_probe_error(ct, "Failed to allocate %u for CTB data
>>>> (%pe)\n",
>>>>                       blob_size, ERR_PTR(err));
>>>>            return err;
>>>>        }
>>>>    -    CT_DEBUG(ct, "base=%#x size=%u\n", intel_guc_ggtt_offset(guc,
>>>> ct->vma), blob_size);
>>>> +    ct_dbg(ct, "base=%#x size=%u\n", intel_guc_ggtt_offset(guc,
>>>> ct->vma), blob_size);
>>>>          /* store pointers to desc and cmds for send ctb */
>>>>        desc = blob;
>>>>        cmds = blob + 2 * CTB_DESC_SIZE;
>>>>        cmds_size = CTB_H2G_BUFFER_SIZE;
>>>>        resv_space = 0;
>>>> -    CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send",
>>>> -         ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>>>> -         resv_space);
>>>> +    ct_dbg(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send",
>>>> +           ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>>>> +           resv_space);
>>>>          guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size,
>>>> resv_space);
>>>>    @@ -258,9 +276,9 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
>>>>        cmds = blob + 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE;
>>>>        cmds_size = CTB_G2H_BUFFER_SIZE;
>>>>        resv_space = G2H_ROOM_BUFFER_SIZE;
>>>> -    CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv",
>>>> -         ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>>>> -         resv_space);
>>>> +    ct_dbg(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv",
>>>> +           ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>>>> +           resv_space);
>>>>          guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size,
>>>> resv_space);
>>>>    @@ -338,7 +356,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
>>>>        return 0;
>>>>      err_out:
>>>> -    CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
>>>> +    ct_probe_error(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
>>>>        return err;
>>>>    }
>>>>    @@ -387,14 +405,12 @@ static int ct_write(struct intel_guc_ct *ct,
>>>>      #ifdef CONFIG_DRM_I915_DEBUG_GUC
>>>>        if (unlikely(tail != READ_ONCE(desc->tail))) {
>>>> -        CT_ERROR(ct, "Tail was modified %u != %u\n",
>>>> -             desc->tail, tail);
>>>> +        ct_err(ct, "Tail was modified %u != %u\n", desc->tail, tail);
>>>>            desc->status |= GUC_CTB_STATUS_MISMATCH;
>>>>            goto corrupted;
>>>>        }
>>>>        if (unlikely(READ_ONCE(desc->head) >= size)) {
>>>> -        CT_ERROR(ct, "Invalid head offset %u >= %u)\n",
>>>> -             desc->head, size);
>>>> +        ct_err(ct, "Invalid head offset %u >= %u)\n", desc->head,
>>>> size);
>>>>            desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>>            goto corrupted;
>>>>        }
>>>> @@ -415,8 +431,8 @@ static int ct_write(struct intel_guc_ct *ct,
>>>>            FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
>>>>                   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
>>>>    -    CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n",
>>>> -         tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
>>>> +    ct_dbg(ct, "writing (tail %u) %*ph %*ph %*ph\n",
>>>> +           tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
>>>>          cmds[tail] = header;
>>>>        tail = (tail + 1) % size;
>>>> @@ -447,8 +463,8 @@ static int ct_write(struct intel_guc_ct *ct,
>>>>        return 0;
>>>>      corrupted:
>>>> -    CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
>>>> -         desc->head, desc->tail, desc->status);
>>>> +    ct_err(ct, "Corrupted descriptor on write head=%u tail=%u
>>>> status=%#x\n",
>>>> +           desc->head, desc->tail, desc->status);
>>>>        ctb->broken = true;
>>>>        return -EPIPE;
>>>>    }
>>>> @@ -507,17 +523,14 @@ static inline bool ct_deadlocked(struct
>>>> intel_guc_ct *ct)
>>>>            struct guc_ct_buffer_desc *send = ct->ctbs.send.desc;
>>>>            struct guc_ct_buffer_desc *recv = ct->ctbs.send.desc;
>>>>    -        CT_ERROR(ct, "Communication stalled for %lld ms, desc
>>>> status=%#x,%#x\n",
>>>> -             ktime_ms_delta(ktime_get(), ct->stall_time),
>>>> -             send->status, recv->status);
>>>> -        CT_ERROR(ct, "H2G Space: %u (Bytes)\n",
>>>> -             atomic_read(&ct->ctbs.send.space) * 4);
>>>> -        CT_ERROR(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head);
>>>> -        CT_ERROR(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail);
>>>> -        CT_ERROR(ct, "G2H Space: %u (Bytes)\n",
>>>> -             atomic_read(&ct->ctbs.recv.space) * 4);
>>>> -        CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
>>>> -        CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
>>>> +        ct_err(ct, "Communication stalled for %lld ms, desc
>>>> status=%#x,%#x\n",
>>>> +               ktime_ms_delta(ktime_get(), ct->stall_time),
>>>> send->status, recv->status);
>>>> +        ct_err(ct, "H2G Space: %u (Bytes)\n",
>>>> atomic_read(&ct->ctbs.send.space) * 4);
>>>> +        ct_err(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head);
>>>> +        ct_err(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail);
>>>> +        ct_err(ct, "G2H Space: %u (Bytes)\n",
>>>> atomic_read(&ct->ctbs.recv.space) * 4);
>>>> +        ct_err(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
>>>> +        ct_err(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
>>>>              ct->ctbs.send.broken = true;
>>>>        }
>>>> @@ -563,8 +576,7 @@ static inline bool h2g_has_room(struct
>>>> intel_guc_ct *ct, u32 len_dw)
>>>>          head = READ_ONCE(desc->head);
>>>>        if (unlikely(head > ctb->size)) {
>>>> -        CT_ERROR(ct, "Invalid head offset %u >= %u)\n",
>>>> -             head, ctb->size);
>>>> +        ct_err(ct, "Invalid head offset %u >= %u)\n", head, ctb->size);
>>>>            desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>>            ctb->broken = true;
>>>>            return false;
>>>> @@ -715,17 +727,17 @@ static int ct_send(struct intel_guc_ct *ct,
>>>>                /* wait_for_ct_request_update returns -ENODEV on
>>>> reset/suspend in progress.
>>>>                 * In this case, output is debug rather than error info
>>>>                 */
>>>> -            CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is
>>>> disabled\n",
>>>> -                 action[0], request.fence);
>>>> +            ct_dbg(ct, "Request %#x (fence %u) cancelled as CTB is
>>>> disabled\n",
>>>> +                   action[0], request.fence);
>>>>            else
>>>> -            CT_ERROR(ct, "No response for request %#x (fence %u)\n",
>>>> -                 action[0], request.fence);
>>>> +            ct_err(ct, "No response for request %#x (fence %u)\n",
>>>> +                   action[0], request.fence);
>>>>            goto unlink;
>>>>        }
>>>>          if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) ==
>>>> GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
>>>> -        CT_DEBUG(ct, "retrying request %#x (%u)\n", *action,
>>>> -             FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status));
>>>> +        ct_dbg(ct, "retrying request %#x (%u)\n", *action,
>>>> +               FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status));
>>>>            send_again = true;
>>>>            goto unlink;
>>>>        }
>>>> @@ -737,12 +749,12 @@ static int ct_send(struct intel_guc_ct *ct,
>>>>          if (response_buf) {
>>>>            /* There shall be no data in the status */
>>>> -        WARN_ON(FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0,
>>>> request.status));
>>>> +        ct_WARN_ON(ct, FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0,
>>>> request.status));
>>>>            /* Return actual response len */
>>>>            err = request.response_len;
>>>>        } else {
>>>>            /* There shall be no response payload */
>>>> -        WARN_ON(request.response_len);
>>>> +        ct_WARN_ON(ct, request.response_len);
>>>>            /* Return data decoded from the status dword */
>>>>            err = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, *status);
>>>>        }
>>>> @@ -771,7 +783,7 @@ int intel_guc_ct_send(struct intel_guc_ct *ct,
>>>> const u32 *action, u32 len,
>>>>            struct intel_guc *guc = ct_to_guc(ct);
>>>>            struct intel_uc *uc = container_of(guc, struct intel_uc, guc);
>>>>    -        WARN(!uc->reset_in_progress, "Unexpected send:
>>>> action=%#x\n", *action);
>>>> +        ct_WARN(ct, !uc->reset_in_progress, "Unexpected send:
>>>> action=%#x\n", *action);
>>>>            return -ENODEV;
>>>>        }
>>>>    @@ -784,11 +796,11 @@ int intel_guc_ct_send(struct intel_guc_ct
>>>> *ct, const u32 *action, u32 len,
>>>>        ret = ct_send(ct, action, len, response_buf, response_buf_size,
>>>> &status);
>>>>        if (unlikely(ret < 0)) {
>>>>            if (ret != -ENODEV)
>>>> -            CT_ERROR(ct, "Sending action %#x failed (%pe)
>>>> status=%#X\n",
>>>> -                 action[0], ERR_PTR(ret), status);
>>>> +            ct_err(ct, "sending action %#x failed (%pe) status=%#X\n",
>>>> +                   action[0], ERR_PTR(ret), status);
>>>>        } else if (unlikely(ret)) {
>>>> -        CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
>>>> -             action[0], ret, ret);
>>>> +        ct_dbg(ct, "send action %#x returned %d (%#x)\n",
>>>> +               action[0], ret, ret);
>>>>        }
>>>>          return ret;
>>>> @@ -838,7 +850,7 @@ static int ct_read(struct intel_guc_ct *ct,
>>>> struct ct_incoming_msg **msg)
>>>>                 * contexts/engines being reset. But should never
>>>> happen as
>>>>                 * no contexts should be active when CLIENT_RESET is sent.
>>>>                 */
>>>> -            CT_ERROR(ct, "Unexpected G2H after GuC has stopped!\n");
>>>> +            ct_err(ct, "Unexpected G2H after GuC has stopped!\n");
>>>>                status &= ~GUC_CTB_STATUS_UNUSED;
>>>>            }
>>>>    @@ -850,15 +862,13 @@ static int ct_read(struct intel_guc_ct *ct,
>>>> struct ct_incoming_msg **msg)
>>>>      #ifdef CONFIG_DRM_I915_DEBUG_GUC
>>>>        if (unlikely(head != READ_ONCE(desc->head))) {
>>>> -        CT_ERROR(ct, "Head was modified %u != %u\n",
>>>> -             desc->head, head);
>>>> +        ct_err(ct, "Head was modified %u != %u\n", desc->head, head);
>>>>            desc->status |= GUC_CTB_STATUS_MISMATCH;
>>>>            goto corrupted;
>>>>        }
>>>>    #endif
>>>>        if (unlikely(tail >= size)) {
>>>> -        CT_ERROR(ct, "Invalid tail offset %u >= %u)\n",
>>>> -             tail, size);
>>>> +        ct_err(ct, "Invalid tail offset %u >= %u)\n", tail, size);
>>>>            desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>>            goto corrupted;
>>>>        }
>>>> @@ -873,7 +883,7 @@ static int ct_read(struct intel_guc_ct *ct,
>>>> struct ct_incoming_msg **msg)
>>>>        /* beware of buffer wrap case */
>>>>        if (unlikely(available < 0))
>>>>            available += size;
>>>> -    CT_DEBUG(ct, "available %d (%u:%u:%u)\n", available, head, tail,
>>>> size);
>>>> +    ct_dbg(ct, "read available %d (%u:%u:%u)\n", available, head,
>>>> tail, size);
>>>>        GEM_BUG_ON(available < 0);
>>>>          header = cmds[head];
>>>> @@ -882,24 +892,24 @@ static int ct_read(struct intel_guc_ct *ct,
>>>> struct ct_incoming_msg **msg)
>>>>        /* message len with header */
>>>>        len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, header) +
>>>> GUC_CTB_MSG_MIN_LEN;
>>>>        if (unlikely(len > (u32)available)) {
>>>> -        CT_ERROR(ct, "Incomplete message %*ph %*ph %*ph\n",
>>>> -             4, &header,
>>>> -             4 * (head + available - 1 > size ?
>>>> -                  size - head : available - 1), &cmds[head],
>>>> -             4 * (head + available - 1 > size ?
>>>> -                  available - 1 - size + head : 0), &cmds[0]);
>>>> +        ct_err(ct, "Incomplete message %*ph %*ph %*ph\n",
>>>> +               4, &header,
>>>> +               4 * (head + available - 1 > size ?
>>>> +                size - head : available - 1), &cmds[head],
>>>> +               4 * (head + available - 1 > size ?
>>>> +                available - 1 - size + head : 0), &cmds[0]);
>>>>            desc->status |= GUC_CTB_STATUS_UNDERFLOW;
>>>>            goto corrupted;
>>>>        }
>>>>          *msg = ct_alloc_msg(len);
>>>>        if (!*msg) {
>>>> -        CT_ERROR(ct, "No memory for message %*ph %*ph %*ph\n",
>>>> -             4, &header,
>>>> -             4 * (head + available - 1 > size ?
>>>> -                  size - head : available - 1), &cmds[head],
>>>> -             4 * (head + available - 1 > size ?
>>>> -                  available - 1 - size + head : 0), &cmds[0]);
>>>> +        ct_err(ct, "No memory for message %*ph %*ph %*ph\n",
>>>> +               4, &header,
>>>> +               4 * (head + available - 1 > size ?
>>>> +                size - head : available - 1), &cmds[head],
>>>> +               4 * (head + available - 1 > size ?
>>>> +                available - 1 - size + head : 0), &cmds[0]);
>>>>            return available;
>>>>        }
>>>>    @@ -909,7 +919,7 @@ static int ct_read(struct intel_guc_ct *ct,
>>>> struct ct_incoming_msg **msg)
>>>>            (*msg)->msg[i] = cmds[head];
>>>>            head = (head + 1) % size;
>>>>        }
>>>> -    CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg);
>>>> +    ct_dbg(ct, "received %*ph\n", 4 * len, (*msg)->msg);
>>>>          /* update local copies */
>>>>        ctb->head = head;
>>>> @@ -920,8 +930,8 @@ static int ct_read(struct intel_guc_ct *ct,
>>>> struct ct_incoming_msg **msg)
>>>>        return available - len;
>>>>      corrupted:
>>>> -    CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
>>>> -         desc->head, desc->tail, desc->status);
>>>> +    ct_err(ct, "Corrupted descriptor on read head=%u tail=%u
>>>> status=%#x\n",
>>>> +           desc->head, desc->tail, desc->status);
>>>>        ctb->broken = true;
>>>>        return -EPIPE;
>>>>    }
>>>> @@ -944,18 +954,17 @@ static int ct_handle_response(struct
>>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>>               FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) !=
>>>> GUC_HXG_TYPE_NO_RESPONSE_RETRY &&
>>>>               FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) !=
>>>> GUC_HXG_TYPE_RESPONSE_FAILURE);
>>>>    -    CT_DEBUG(ct, "response fence %u status %#x\n", fence, hxg[0]);
>>>> +    ct_dbg(ct, "response fence %u status %#x\n", fence, hxg[0]);
>>>>          spin_lock_irqsave(&ct->requests.lock, flags);
>>>>        list_for_each_entry(req, &ct->requests.pending, link) {
>>>>            if (unlikely(fence != req->fence)) {
>>>> -            CT_DEBUG(ct, "request %u awaits response\n",
>>>> -                 req->fence);
>>>> +            ct_dbg(ct, "request %u awaits response\n", req->fence);
>>>>                continue;
>>>>            }
>>>>            if (unlikely(datalen > req->response_len)) {
>>>> -            CT_ERROR(ct, "Response %u too long (datalen %u > %u)\n",
>>>> -                 req->fence, datalen, req->response_len);
>>>> +            ct_err(ct, "response %u too long (datalen %u > %u)\n",
>>>> +                   req->fence, datalen, req->response_len);
>>>>                datalen = min(datalen, req->response_len);
>>>>                err = -EMSGSIZE;
>>>>            }
>>>> @@ -967,12 +976,11 @@ static int ct_handle_response(struct
>>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>>            break;
>>>>        }
>>>>        if (!found) {
>>>> -        CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence);
>>>> -        CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n", fence,
>>>> -             ct->requests.last_fence);
>>>> +        ct_err(ct, "Unsolicited response (fence %u)\n", fence);
>>>> +        ct_err(ct, "Could not find fence=%u, last_fence=%u\n", fence,
>>>> +               ct->requests.last_fence);
>>>>            list_for_each_entry(req, &ct->requests.pending, link)
>>>> -            CT_ERROR(ct, "request %u awaits response\n",
>>>> -                 req->fence);
>>>> +            ct_err(ct, "request %u awaits response\n", req->fence);
>>>>            err = -ENOKEY;
>>>>        }
>>>>        spin_unlock_irqrestore(&ct->requests.lock, flags);
>>>> @@ -998,7 +1006,7 @@ static int ct_process_request(struct
>>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>>        action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
>>>>        len = hxg_len - GUC_HXG_MSG_MIN_LEN;
>>>>    -    CT_DEBUG(ct, "request %x %*ph\n", action, 4 * len, payload);
>>>> +    ct_dbg(ct, "request %x %*ph\n", action, 4 * len, payload);
>>>>          switch (action) {
>>>>        case INTEL_GUC_ACTION_DEFAULT:
>>>> @@ -1016,9 +1024,6 @@ static int ct_process_request(struct
>>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>>            break;
>>>>        case INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
>>>>            ret = intel_guc_error_capture_process_msg(guc, payload, len);
>>>> -        if (unlikely(ret))
>>>> -            CT_ERROR(ct, "error capture notification failed %x %*ph\n",
>>>> -                 action, 4 * len, payload);
>>>>            break;
>>>>        case INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
>>>>            ret = intel_guc_engine_failure_process_msg(guc, payload, len);
>>>> @@ -1028,11 +1033,11 @@ static int ct_process_request(struct
>>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>>            ret = 0;
>>>>            break;
>>>>        case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED:
>>>> -        CT_ERROR(ct, "Received GuC crash dump notification!\n");
>>>> +        guc_err(guc, "notification: Crash dump!\n");
>>>>            ret = 0;
>>>>            break;
>>>>        case INTEL_GUC_ACTION_NOTIFY_EXCEPTION:
>>>> -        CT_ERROR(ct, "Received GuC exception notification!\n");
>>>> +        guc_err(guc, "notification: Exception!\n");
>>>>            ret = 0;
>>>>            break;
>>>>        default:
>>>> @@ -1041,8 +1046,7 @@ static int ct_process_request(struct
>>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>>        }
>>>>          if (unlikely(ret)) {
>>>> -        CT_ERROR(ct, "Failed to process request %04x (%pe)\n",
>>>> -             action, ERR_PTR(ret));
>>>> +        ct_err(ct, "Failed to process request %04x (%pe)\n", action,
>>>> ERR_PTR(ret));
>>>>            return ret;
>>>>        }
>>>>    @@ -1070,8 +1074,8 @@ static bool
>>>> ct_process_incoming_requests(struct intel_guc_ct *ct)
>>>>          err = ct_process_request(ct, request);
>>>>        if (unlikely(err)) {
>>>> -        CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n",
>>>> -             ERR_PTR(err), 4 * request->size, request->msg);
>>>> +        ct_err(ct, "Failed to process message (%pe) %*ph\n",
>>>> +               ERR_PTR(err), 4 * request->size, request->msg);
>>>>            ct_free_msg(request);
>>>>        }
>>>>    @@ -1149,8 +1153,8 @@ static int ct_handle_hxg(struct intel_guc_ct
>>>> *ct, struct ct_incoming_msg *msg)
>>>>          if (unlikely(err)) {
>>>>    failed:
>>>> -        CT_ERROR(ct, "Failed to handle HXG message (%pe) %*ph\n",
>>>> -             ERR_PTR(err), 4 * GUC_HXG_MSG_MIN_LEN, hxg);
>>>> +        ct_err(ct, "Failed to handle HXG message (%pe) %*ph\n",
>>>> +               ERR_PTR(err), 4 * GUC_HXG_MSG_MIN_LEN, hxg);
>>>>        }
>>>>        return err;
>>>>    }
>>>> @@ -1166,8 +1170,8 @@ static void ct_handle_msg(struct intel_guc_ct
>>>> *ct, struct ct_incoming_msg *msg)
>>>>            err = -EOPNOTSUPP;
>>>>          if (unlikely(err)) {
>>>> -        CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n",
>>>> -             ERR_PTR(err), 4 * msg->size, msg->msg);
>>>> +        ct_err(ct, "Failed to handle message (%pe) %*ph\n",
>>>> +               ERR_PTR(err), 4 * msg->size, msg->msg);
>>>>            ct_free_msg(msg);
>>>>        }
>>>>    }
>>>> @@ -1198,7 +1202,7 @@ static void ct_try_receive_message(struct
>>>> intel_guc_ct *ct)
>>>>    {
>>>>        int ret;
>>>>    -    if (GEM_WARN_ON(!ct->enabled))
>>>> +    if (ct_WARN_ON(ct, !ct->enabled))
>>>>            return;
>>>>          ret = ct_receive(ct);
>>>> @@ -1220,7 +1224,7 @@ static void ct_receive_tasklet_func(struct
>>>> tasklet_struct *t)
>>>>    void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
>>>>    {
>>>>        if (unlikely(!ct->enabled)) {
>>>> -        WARN(1, "Unexpected GuC event received while CT disabled!\n");
>>>> +        ct_warn(ct, "Unexpected event received while disabled!\n");
>>>>            return;
>>>>        }
>>>>
Michal Wajdeczko Dec. 1, 2022, 11:56 a.m. UTC | #5
On 01.12.2022 01:41, John Harrison wrote:
> On 11/23/2022 12:45, Michal Wajdeczko wrote:
>> On 23.11.2022 02:25, John Harrison wrote:
>>> On 11/22/2022 09:54, Michal Wajdeczko wrote:
>>>> On 18.11.2022 02:58, John.C.Harrison@Intel.com wrote:
>>>>> From: John Harrison <John.C.Harrison@Intel.com>
>>>>>
>>>>> Re-work the existing GuC CT printers and extend as required to match
>>>>> the new wrapping scheme.
>>>>>
>>>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>>>> ---
>>>>>    drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 222
>>>>> +++++++++++-----------
>>>>>    1 file changed, 113 insertions(+), 109 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>>> index 2b22065e87bf9..9d404fb377637 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>>> @@ -18,31 +18,49 @@ static inline struct intel_guc *ct_to_guc(struct
>>>>> intel_guc_ct *ct)
>>>>>        return container_of(ct, struct intel_guc, ct);
>>>>>    }
>>>>>    -static inline struct intel_gt *ct_to_gt(struct intel_guc_ct *ct)
>>>>> -{
>>>>> -    return guc_to_gt(ct_to_guc(ct));
>>>>> -}
>>>>> -
>>>>>    static inline struct drm_i915_private *ct_to_i915(struct
>>>>> intel_guc_ct *ct)
>>>>>    {
>>>>> -    return ct_to_gt(ct)->i915;
>>>>> -}
>>>>> +    struct intel_guc *guc = ct_to_guc(ct);
>>>>> +    struct intel_gt *gt = guc_to_gt(guc);
>>>>>    -static inline struct drm_device *ct_to_drm(struct intel_guc_ct
>>>>> *ct)
>>>>> -{
>>>>> -    return &ct_to_i915(ct)->drm;
>>>>> +    return gt->i915;
>>>>>    }
>>>>>    -#define CT_ERROR(_ct, _fmt, ...) \
>>>>> -    drm_err(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
>>>>> +#define ct_err(_ct, _fmt, ...) \
>>>>> +    guc_err(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>>> +
>>>>> +#define ct_warn(_ct, _fmt, ...) \
>>>>> +    guc_warn(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>>> +
>>>>> +#define ct_notice(_ct, _fmt, ...) \
>>>>> +    guc_notice(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>>> +
>>>>> +#define ct_info(_ct, _fmt, ...) \
>>>>> +    guc_info(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>>> +
>>>>>    #ifdef CONFIG_DRM_I915_DEBUG_GUC
>>>>> -#define CT_DEBUG(_ct, _fmt, ...) \
>>>>> -    drm_dbg(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
>>>>> +#define ct_dbg(_ct, _fmt, ...) \
>>>>> +    guc_dbg(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>>>    #else
>>>>> -#define CT_DEBUG(...)    do { } while (0)
>>>>> +#define ct_dbg(...)    do { } while (0)
>>>>>    #endif
>>>>> -#define CT_PROBE_ERROR(_ct, _fmt, ...) \
>>>>> -    i915_probe_error(ct_to_i915(ct), "CT: " _fmt, ##__VA_ARGS__)
>>>>> +
>>>>> +#define ct_probe_error(_ct, _fmt, ...) \
>>>>> +    do { \
>>>>> +        if (i915_error_injected()) \
>>>>> +            ct_dbg(_ct, _fmt, ##__VA_ARGS__); \
>>>>> +        else \
>>>>> +            ct_err(_ct, _fmt, ##__VA_ARGS__); \
>>>>> +    } while (0)
>>>> guc_probe_error ?
>>>>
>>>>> +
>>>>> +#define ct_WARN_ON(_ct, _condition) \
>>>>> +    ct_WARN(_ct, _condition, "%s", "ct_WARN_ON("
>>>>> __stringify(_condition) ")")
>>>>> +
>>>>> +#define ct_WARN(_ct, _condition, _fmt, ...) \
>>>>> +    guc_WARN(ct_to_guc(_ct), _condition, "CT " _fmt, ##__VA_ARGS__)
>>>>> +
>>>>> +#define ct_WARN_ONCE(_ct, _condition, _fmt, ...) \
>>>>> +    guc_WARN_ONCE(ct_to_guc(_ct), _condition, "CT " _fmt,
>>>>> ##__VA_ARGS__)
>>>>>      /**
>>>>>     * DOC: CTB Blob
>>>>> @@ -170,7 +188,7 @@ static int ct_control_enable(struct intel_guc_ct
>>>>> *ct, bool enable)
>>>>>        err = guc_action_control_ctb(ct_to_guc(ct), enable ?
>>>>>                         GUC_CTB_CONTROL_ENABLE :
>>>>> GUC_CTB_CONTROL_DISABLE);
>>>>>        if (unlikely(err))
>>>>> -        CT_PROBE_ERROR(ct, "Failed to control/%s CTB (%pe)\n",
>>>>> +        ct_probe_error(ct, "Failed to control/%s CTB (%pe)\n",
>>>>>                       str_enable_disable(enable), ERR_PTR(err));
>>>> btw, shouldn't we change all messages to start with lowercase ?
>>>>
>>>> was:
>>>>      "CT0: Failed to control/%s CTB (%pe)"
>>>> is:
>>>>      "GT0: GuC CT Failed to control/%s CTB (%pe)"
>>>>
>>>> unless we keep colon (as suggested by Tvrtko) as then:
>>>>
>>>>      "GT0: GuC CT: Failed to control/%s CTB (%pe)"
>>> Blanket added the colon makes it messy when a string actually wants to
>>> start with the prefix. The rule I've been using is lower case word when
>>> the prefix was part of the string, upper case word when the prefix is
>> Hmm, I'm not sure that we should attempt to have such a flexible rule as
>> we shouldn't rely too much on actual format of the prefix as it could be
>> changed any time.  All we should know about final log message is that it
>> _will_ properly identify the "GT" or "GuC" that this log is related to.
>>
>> So I would suggest to be just consistent and probably always start with
>> upper case, as that seems to be mostly used in kernel error logs, and
>> just make sure that any prefix will honor that (by including colon, or
>> braces), so this will always work like:
>>
>> "[drm] *ERROR* GT0: Failed to foo (-EIO)"
>> "[drm] *ERROR* GT0: GUC: Failed to foo (-EIO)"
>> "[drm] *ERROR* GT0: GUC: CT: Failed to foo (-EIO)"
>>
>> or
>>
>> "[drm] *ERROR* GT0: Failed to foo (-EIO)"
>> "[drm] *ERROR* GT0: [GUC] Failed to foo (-EIO)"
>> "[drm] *ERROR* GT0: [GUC] CT: Failed to foo (-EIO)"
>>
>> and even for:
>>
>> "[drm] *ERROR* GT(root) Failed to foo (-EIO)"
>> "[drm] *ERROR* GuC(media) Failed to foo (-EIO)"
>> "[drm] *ERROR* GT0 [GuC:CT] Failed to foo (-EIO)"
> All of which are hideous/complex/verbose/inconsistent. 'GT0: GUC: CT:'?
> Really? Or 'GT0: [GUC] CT:'? Why the random mix of separators? And how
> would you implement '[GUC:CT]' without having a CT definition that is
> entirely self contained and does chain on to the GuC level version?

you missed the point, as those were just examples of different possible
prefixes that one could define, to show that actual message shall not
make any assumption how such prefix will look like or how it will end
(like with or w/o colon, with "GuC" or "GT" tag or whatever)

> 
> This is pointless bikeshedding. If you want to re-write every single
> debug print (yet again) and invent much more complicated macro

the opposite, I want clear understanding how messages should be written
to *avoid* rewriting them if (for some reason) we decide to change or
update the prefix in the future

> definitions then feel free to take over the patch set. If not can we
> just approve the v3 version and move on to doing some actual work?

if everyone is happy that there is inconsistency in use between gt_xxx
messages where we shall be using messages starting with upper case
(since prefix ends with colon) and guc/ct_xxx messages where we shall be
using lower case message (since there is a known prefix without colon,
either "GuC" or "CT") then I'll be also fine, but for now that bothers
me a little, hence asking for clarifications/agreement

and while for dbg level messages it doesn't matter, I assume we should
be consistent for err/warn/info messages (as those will eventually show
up to the end user) so let maintainers decide here what is expectation here

> 
> John.
> 
> 
>>
>>
>>> just being added as a prefix. I originally just had the prefix as raw
>>> with no trailing space, so the individual print could decide to add a
>>> colon, a space, or whatever as appropriate. But that just makes for
>>> messy code with some files having every string look like ": Stuff
>>> happened" and other files have every string look like " failed to ...".
>>> The current version seems to be the most readable from the point of view
>>> of writing the code and of reading the dmesg results.
>>>
>>> And to be clear, the 'CT0' you have in your 'was' example only exists in
>>> the internal tree. It never made it to upstream. It is also just plain
>>> wrong. Each GT has two CTs - send and receive. So having 'CT1' meaning
>>> some random CT on GT1 (as opposed to the read channel on GT0, for
>>> example) was very confusing.
>> I don't think I was wrong, it might be you confusing CT with CTB.
>>
>> Note that we only support _single_ CT (Command Transport) and this
>> single CT includes _pair_ of CTBs (Command Transport Buffers).
>>
>> And logs/macros are CT oriented, not CTB.
> The GuC spec does not make any distinction. In fact, there is no CT in

but "CT" concept is already there, we do have intel_guc_ct, right?

https://elixir.bootlin.com/linux/latest/source/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h#L22

https://elixir.bootlin.com/linux/latest/source/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h#L56

> the spec. There is just CTB. So at best it is ambiguous as to what CT0 /
> CT1 refers to.
> 
>>
>> Regarding those my internal changes that were printing "CT%u": they were
>> added exactly for the same reason as your series: to know which GT
>> traffic we are tracing.  And were done locally on CT level only as there
>> was no helpers that would automatically append "GT%u" prefix - helpers
>> that you're defining right now ;)
>>
>> Michal
>>
>>> John.
>>>
>>>
>>>> Michal
>>>>
>>>>>          return err;
>>>>> @@ -201,7 +219,7 @@ static int ct_register_buffer(struct intel_guc_ct
>>>>> *ct, bool send,
>>>>>                       size);
>>>>>        if (unlikely(err))
>>>>>    failed:
>>>>> -        CT_PROBE_ERROR(ct, "Failed to register %s buffer (%pe)\n",
>>>>> +        ct_probe_error(ct, "Failed to register %s buffer (%pe)\n",
>>>>>                       send ? "SEND" : "RECV", ERR_PTR(err));
>>>>>          return err;
>>>>> @@ -235,21 +253,21 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
>>>>>        blob_size = 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
>>>>> CTB_G2H_BUFFER_SIZE;
>>>>>        err = intel_guc_allocate_and_map_vma(guc, blob_size, &ct->vma,
>>>>> &blob);
>>>>>        if (unlikely(err)) {
>>>>> -        CT_PROBE_ERROR(ct, "Failed to allocate %u for CTB data
>>>>> (%pe)\n",
>>>>> +        ct_probe_error(ct, "Failed to allocate %u for CTB data
>>>>> (%pe)\n",
>>>>>                       blob_size, ERR_PTR(err));
>>>>>            return err;
>>>>>        }
>>>>>    -    CT_DEBUG(ct, "base=%#x size=%u\n", intel_guc_ggtt_offset(guc,
>>>>> ct->vma), blob_size);
>>>>> +    ct_dbg(ct, "base=%#x size=%u\n", intel_guc_ggtt_offset(guc,
>>>>> ct->vma), blob_size);
>>>>>          /* store pointers to desc and cmds for send ctb */
>>>>>        desc = blob;
>>>>>        cmds = blob + 2 * CTB_DESC_SIZE;
>>>>>        cmds_size = CTB_H2G_BUFFER_SIZE;
>>>>>        resv_space = 0;
>>>>> -    CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send",
>>>>> -         ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>>>>> -         resv_space);
>>>>> +    ct_dbg(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send",
>>>>> +           ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>>>>> +           resv_space);
>>>>>          guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size,
>>>>> resv_space);
>>>>>    @@ -258,9 +276,9 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
>>>>>        cmds = blob + 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE;
>>>>>        cmds_size = CTB_G2H_BUFFER_SIZE;
>>>>>        resv_space = G2H_ROOM_BUFFER_SIZE;
>>>>> -    CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv",
>>>>> -         ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>>>>> -         resv_space);
>>>>> +    ct_dbg(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv",
>>>>> +           ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
>>>>> +           resv_space);
>>>>>          guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size,
>>>>> resv_space);
>>>>>    @@ -338,7 +356,7 @@ int intel_guc_ct_enable(struct intel_guc_ct
>>>>> *ct)
>>>>>        return 0;
>>>>>      err_out:
>>>>> -    CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
>>>>> +    ct_probe_error(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
>>>>>        return err;
>>>>>    }
>>>>>    @@ -387,14 +405,12 @@ static int ct_write(struct intel_guc_ct *ct,
>>>>>      #ifdef CONFIG_DRM_I915_DEBUG_GUC
>>>>>        if (unlikely(tail != READ_ONCE(desc->tail))) {
>>>>> -        CT_ERROR(ct, "Tail was modified %u != %u\n",
>>>>> -             desc->tail, tail);
>>>>> +        ct_err(ct, "Tail was modified %u != %u\n", desc->tail, tail);
>>>>>            desc->status |= GUC_CTB_STATUS_MISMATCH;
>>>>>            goto corrupted;
>>>>>        }
>>>>>        if (unlikely(READ_ONCE(desc->head) >= size)) {
>>>>> -        CT_ERROR(ct, "Invalid head offset %u >= %u)\n",
>>>>> -             desc->head, size);
>>>>> +        ct_err(ct, "Invalid head offset %u >= %u)\n", desc->head,
>>>>> size);
>>>>>            desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>>>            goto corrupted;
>>>>>        }
>>>>> @@ -415,8 +431,8 @@ static int ct_write(struct intel_guc_ct *ct,
>>>>>            FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
>>>>>                   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
>>>>>    -    CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n",
>>>>> -         tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
>>>>> +    ct_dbg(ct, "writing (tail %u) %*ph %*ph %*ph\n",
>>>>> +           tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
>>>>>          cmds[tail] = header;
>>>>>        tail = (tail + 1) % size;
>>>>> @@ -447,8 +463,8 @@ static int ct_write(struct intel_guc_ct *ct,
>>>>>        return 0;
>>>>>      corrupted:
>>>>> -    CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
>>>>> -         desc->head, desc->tail, desc->status);
>>>>> +    ct_err(ct, "Corrupted descriptor on write head=%u tail=%u
>>>>> status=%#x\n",
>>>>> +           desc->head, desc->tail, desc->status);
>>>>>        ctb->broken = true;
>>>>>        return -EPIPE;
>>>>>    }
>>>>> @@ -507,17 +523,14 @@ static inline bool ct_deadlocked(struct
>>>>> intel_guc_ct *ct)
>>>>>            struct guc_ct_buffer_desc *send = ct->ctbs.send.desc;
>>>>>            struct guc_ct_buffer_desc *recv = ct->ctbs.send.desc;
>>>>>    -        CT_ERROR(ct, "Communication stalled for %lld ms, desc
>>>>> status=%#x,%#x\n",
>>>>> -             ktime_ms_delta(ktime_get(), ct->stall_time),
>>>>> -             send->status, recv->status);
>>>>> -        CT_ERROR(ct, "H2G Space: %u (Bytes)\n",
>>>>> -             atomic_read(&ct->ctbs.send.space) * 4);
>>>>> -        CT_ERROR(ct, "Head: %u (Dwords)\n",
>>>>> ct->ctbs.send.desc->head);
>>>>> -        CT_ERROR(ct, "Tail: %u (Dwords)\n",
>>>>> ct->ctbs.send.desc->tail);
>>>>> -        CT_ERROR(ct, "G2H Space: %u (Bytes)\n",
>>>>> -             atomic_read(&ct->ctbs.recv.space) * 4);
>>>>> -        CT_ERROR(ct, "Head: %u\n (Dwords)",
>>>>> ct->ctbs.recv.desc->head);
>>>>> -        CT_ERROR(ct, "Tail: %u\n (Dwords)",
>>>>> ct->ctbs.recv.desc->tail);
>>>>> +        ct_err(ct, "Communication stalled for %lld ms, desc
>>>>> status=%#x,%#x\n",
>>>>> +               ktime_ms_delta(ktime_get(), ct->stall_time),
>>>>> send->status, recv->status);
>>>>> +        ct_err(ct, "H2G Space: %u (Bytes)\n",
>>>>> atomic_read(&ct->ctbs.send.space) * 4);
>>>>> +        ct_err(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head);
>>>>> +        ct_err(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail);
>>>>> +        ct_err(ct, "G2H Space: %u (Bytes)\n",
>>>>> atomic_read(&ct->ctbs.recv.space) * 4);
>>>>> +        ct_err(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
>>>>> +        ct_err(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
>>>>>              ct->ctbs.send.broken = true;
>>>>>        }
>>>>> @@ -563,8 +576,7 @@ static inline bool h2g_has_room(struct
>>>>> intel_guc_ct *ct, u32 len_dw)
>>>>>          head = READ_ONCE(desc->head);
>>>>>        if (unlikely(head > ctb->size)) {
>>>>> -        CT_ERROR(ct, "Invalid head offset %u >= %u)\n",
>>>>> -             head, ctb->size);
>>>>> +        ct_err(ct, "Invalid head offset %u >= %u)\n", head,
>>>>> ctb->size);
>>>>>            desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>>>            ctb->broken = true;
>>>>>            return false;
>>>>> @@ -715,17 +727,17 @@ static int ct_send(struct intel_guc_ct *ct,
>>>>>                /* wait_for_ct_request_update returns -ENODEV on
>>>>> reset/suspend in progress.
>>>>>                 * In this case, output is debug rather than error info
>>>>>                 */
>>>>> -            CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is
>>>>> disabled\n",
>>>>> -                 action[0], request.fence);
>>>>> +            ct_dbg(ct, "Request %#x (fence %u) cancelled as CTB is
>>>>> disabled\n",
>>>>> +                   action[0], request.fence);
>>>>>            else
>>>>> -            CT_ERROR(ct, "No response for request %#x (fence %u)\n",
>>>>> -                 action[0], request.fence);
>>>>> +            ct_err(ct, "No response for request %#x (fence %u)\n",
>>>>> +                   action[0], request.fence);
>>>>>            goto unlink;
>>>>>        }
>>>>>          if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) ==
>>>>> GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
>>>>> -        CT_DEBUG(ct, "retrying request %#x (%u)\n", *action,
>>>>> -             FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status));
>>>>> +        ct_dbg(ct, "retrying request %#x (%u)\n", *action,
>>>>> +               FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status));
>>>>>            send_again = true;
>>>>>            goto unlink;
>>>>>        }
>>>>> @@ -737,12 +749,12 @@ static int ct_send(struct intel_guc_ct *ct,
>>>>>          if (response_buf) {
>>>>>            /* There shall be no data in the status */
>>>>> -        WARN_ON(FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0,
>>>>> request.status));
>>>>> +        ct_WARN_ON(ct, FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0,
>>>>> request.status));
>>>>>            /* Return actual response len */
>>>>>            err = request.response_len;
>>>>>        } else {
>>>>>            /* There shall be no response payload */
>>>>> -        WARN_ON(request.response_len);
>>>>> +        ct_WARN_ON(ct, request.response_len);
>>>>>            /* Return data decoded from the status dword */
>>>>>            err = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, *status);
>>>>>        }
>>>>> @@ -771,7 +783,7 @@ int intel_guc_ct_send(struct intel_guc_ct *ct,
>>>>> const u32 *action, u32 len,
>>>>>            struct intel_guc *guc = ct_to_guc(ct);
>>>>>            struct intel_uc *uc = container_of(guc, struct intel_uc,
>>>>> guc);
>>>>>    -        WARN(!uc->reset_in_progress, "Unexpected send:
>>>>> action=%#x\n", *action);
>>>>> +        ct_WARN(ct, !uc->reset_in_progress, "Unexpected send:
>>>>> action=%#x\n", *action);
>>>>>            return -ENODEV;
>>>>>        }
>>>>>    @@ -784,11 +796,11 @@ int intel_guc_ct_send(struct intel_guc_ct
>>>>> *ct, const u32 *action, u32 len,
>>>>>        ret = ct_send(ct, action, len, response_buf, response_buf_size,
>>>>> &status);
>>>>>        if (unlikely(ret < 0)) {
>>>>>            if (ret != -ENODEV)
>>>>> -            CT_ERROR(ct, "Sending action %#x failed (%pe)
>>>>> status=%#X\n",
>>>>> -                 action[0], ERR_PTR(ret), status);
>>>>> +            ct_err(ct, "sending action %#x failed (%pe)
>>>>> status=%#X\n",
>>>>> +                   action[0], ERR_PTR(ret), status);
>>>>>        } else if (unlikely(ret)) {
>>>>> -        CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
>>>>> -             action[0], ret, ret);
>>>>> +        ct_dbg(ct, "send action %#x returned %d (%#x)\n",
>>>>> +               action[0], ret, ret);
>>>>>        }
>>>>>          return ret;
>>>>> @@ -838,7 +850,7 @@ static int ct_read(struct intel_guc_ct *ct,
>>>>> struct ct_incoming_msg **msg)
>>>>>                 * contexts/engines being reset. But should never
>>>>> happen as
>>>>>                 * no contexts should be active when CLIENT_RESET is
>>>>> sent.
>>>>>                 */
>>>>> -            CT_ERROR(ct, "Unexpected G2H after GuC has stopped!\n");
>>>>> +            ct_err(ct, "Unexpected G2H after GuC has stopped!\n");
>>>>>                status &= ~GUC_CTB_STATUS_UNUSED;
>>>>>            }
>>>>>    @@ -850,15 +862,13 @@ static int ct_read(struct intel_guc_ct *ct,
>>>>> struct ct_incoming_msg **msg)
>>>>>      #ifdef CONFIG_DRM_I915_DEBUG_GUC
>>>>>        if (unlikely(head != READ_ONCE(desc->head))) {
>>>>> -        CT_ERROR(ct, "Head was modified %u != %u\n",
>>>>> -             desc->head, head);
>>>>> +        ct_err(ct, "Head was modified %u != %u\n", desc->head, head);
>>>>>            desc->status |= GUC_CTB_STATUS_MISMATCH;
>>>>>            goto corrupted;
>>>>>        }
>>>>>    #endif
>>>>>        if (unlikely(tail >= size)) {
>>>>> -        CT_ERROR(ct, "Invalid tail offset %u >= %u)\n",
>>>>> -             tail, size);
>>>>> +        ct_err(ct, "Invalid tail offset %u >= %u)\n", tail, size);
>>>>>            desc->status |= GUC_CTB_STATUS_OVERFLOW;
>>>>>            goto corrupted;
>>>>>        }
>>>>> @@ -873,7 +883,7 @@ static int ct_read(struct intel_guc_ct *ct,
>>>>> struct ct_incoming_msg **msg)
>>>>>        /* beware of buffer wrap case */
>>>>>        if (unlikely(available < 0))
>>>>>            available += size;
>>>>> -    CT_DEBUG(ct, "available %d (%u:%u:%u)\n", available, head, tail,
>>>>> size);
>>>>> +    ct_dbg(ct, "read available %d (%u:%u:%u)\n", available, head,
>>>>> tail, size);
>>>>>        GEM_BUG_ON(available < 0);
>>>>>          header = cmds[head];
>>>>> @@ -882,24 +892,24 @@ static int ct_read(struct intel_guc_ct *ct,
>>>>> struct ct_incoming_msg **msg)
>>>>>        /* message len with header */
>>>>>        len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, header) +
>>>>> GUC_CTB_MSG_MIN_LEN;
>>>>>        if (unlikely(len > (u32)available)) {
>>>>> -        CT_ERROR(ct, "Incomplete message %*ph %*ph %*ph\n",
>>>>> -             4, &header,
>>>>> -             4 * (head + available - 1 > size ?
>>>>> -                  size - head : available - 1), &cmds[head],
>>>>> -             4 * (head + available - 1 > size ?
>>>>> -                  available - 1 - size + head : 0), &cmds[0]);
>>>>> +        ct_err(ct, "Incomplete message %*ph %*ph %*ph\n",
>>>>> +               4, &header,
>>>>> +               4 * (head + available - 1 > size ?
>>>>> +                size - head : available - 1), &cmds[head],
>>>>> +               4 * (head + available - 1 > size ?
>>>>> +                available - 1 - size + head : 0), &cmds[0]);
>>>>>            desc->status |= GUC_CTB_STATUS_UNDERFLOW;
>>>>>            goto corrupted;
>>>>>        }
>>>>>          *msg = ct_alloc_msg(len);
>>>>>        if (!*msg) {
>>>>> -        CT_ERROR(ct, "No memory for message %*ph %*ph %*ph\n",
>>>>> -             4, &header,
>>>>> -             4 * (head + available - 1 > size ?
>>>>> -                  size - head : available - 1), &cmds[head],
>>>>> -             4 * (head + available - 1 > size ?
>>>>> -                  available - 1 - size + head : 0), &cmds[0]);
>>>>> +        ct_err(ct, "No memory for message %*ph %*ph %*ph\n",
>>>>> +               4, &header,
>>>>> +               4 * (head + available - 1 > size ?
>>>>> +                size - head : available - 1), &cmds[head],
>>>>> +               4 * (head + available - 1 > size ?
>>>>> +                available - 1 - size + head : 0), &cmds[0]);
>>>>>            return available;
>>>>>        }
>>>>>    @@ -909,7 +919,7 @@ static int ct_read(struct intel_guc_ct *ct,
>>>>> struct ct_incoming_msg **msg)
>>>>>            (*msg)->msg[i] = cmds[head];
>>>>>            head = (head + 1) % size;
>>>>>        }
>>>>> -    CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg);
>>>>> +    ct_dbg(ct, "received %*ph\n", 4 * len, (*msg)->msg);
>>>>>          /* update local copies */
>>>>>        ctb->head = head;
>>>>> @@ -920,8 +930,8 @@ static int ct_read(struct intel_guc_ct *ct,
>>>>> struct ct_incoming_msg **msg)
>>>>>        return available - len;
>>>>>      corrupted:
>>>>> -    CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
>>>>> -         desc->head, desc->tail, desc->status);
>>>>> +    ct_err(ct, "Corrupted descriptor on read head=%u tail=%u
>>>>> status=%#x\n",
>>>>> +           desc->head, desc->tail, desc->status);
>>>>>        ctb->broken = true;
>>>>>        return -EPIPE;
>>>>>    }
>>>>> @@ -944,18 +954,17 @@ static int ct_handle_response(struct
>>>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>>>               FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) !=
>>>>> GUC_HXG_TYPE_NO_RESPONSE_RETRY &&
>>>>>               FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) !=
>>>>> GUC_HXG_TYPE_RESPONSE_FAILURE);
>>>>>    -    CT_DEBUG(ct, "response fence %u status %#x\n", fence, hxg[0]);
>>>>> +    ct_dbg(ct, "response fence %u status %#x\n", fence, hxg[0]);
>>>>>          spin_lock_irqsave(&ct->requests.lock, flags);
>>>>>        list_for_each_entry(req, &ct->requests.pending, link) {
>>>>>            if (unlikely(fence != req->fence)) {
>>>>> -            CT_DEBUG(ct, "request %u awaits response\n",
>>>>> -                 req->fence);
>>>>> +            ct_dbg(ct, "request %u awaits response\n", req->fence);
>>>>>                continue;
>>>>>            }
>>>>>            if (unlikely(datalen > req->response_len)) {
>>>>> -            CT_ERROR(ct, "Response %u too long (datalen %u > %u)\n",
>>>>> -                 req->fence, datalen, req->response_len);
>>>>> +            ct_err(ct, "response %u too long (datalen %u > %u)\n",
>>>>> +                   req->fence, datalen, req->response_len);
>>>>>                datalen = min(datalen, req->response_len);
>>>>>                err = -EMSGSIZE;
>>>>>            }
>>>>> @@ -967,12 +976,11 @@ static int ct_handle_response(struct
>>>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>>>            break;
>>>>>        }
>>>>>        if (!found) {
>>>>> -        CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence);
>>>>> -        CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n",
>>>>> fence,
>>>>> -             ct->requests.last_fence);
>>>>> +        ct_err(ct, "Unsolicited response (fence %u)\n", fence);
>>>>> +        ct_err(ct, "Could not find fence=%u, last_fence=%u\n", fence,
>>>>> +               ct->requests.last_fence);
>>>>>            list_for_each_entry(req, &ct->requests.pending, link)
>>>>> -            CT_ERROR(ct, "request %u awaits response\n",
>>>>> -                 req->fence);
>>>>> +            ct_err(ct, "request %u awaits response\n", req->fence);
>>>>>            err = -ENOKEY;
>>>>>        }
>>>>>        spin_unlock_irqrestore(&ct->requests.lock, flags);
>>>>> @@ -998,7 +1006,7 @@ static int ct_process_request(struct
>>>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>>>        action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
>>>>>        len = hxg_len - GUC_HXG_MSG_MIN_LEN;
>>>>>    -    CT_DEBUG(ct, "request %x %*ph\n", action, 4 * len, payload);
>>>>> +    ct_dbg(ct, "request %x %*ph\n", action, 4 * len, payload);
>>>>>          switch (action) {
>>>>>        case INTEL_GUC_ACTION_DEFAULT:
>>>>> @@ -1016,9 +1024,6 @@ static int ct_process_request(struct
>>>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>>>            break;
>>>>>        case INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
>>>>>            ret = intel_guc_error_capture_process_msg(guc, payload,
>>>>> len);
>>>>> -        if (unlikely(ret))
>>>>> -            CT_ERROR(ct, "error capture notification failed %x
>>>>> %*ph\n",
>>>>> -                 action, 4 * len, payload);
>>>>>            break;
>>>>>        case INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
>>>>>            ret = intel_guc_engine_failure_process_msg(guc, payload,
>>>>> len);
>>>>> @@ -1028,11 +1033,11 @@ static int ct_process_request(struct
>>>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>>>            ret = 0;
>>>>>            break;
>>>>>        case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED:
>>>>> -        CT_ERROR(ct, "Received GuC crash dump notification!\n");
>>>>> +        guc_err(guc, "notification: Crash dump!\n");
>>>>>            ret = 0;
>>>>>            break;
>>>>>        case INTEL_GUC_ACTION_NOTIFY_EXCEPTION:
>>>>> -        CT_ERROR(ct, "Received GuC exception notification!\n");
>>>>> +        guc_err(guc, "notification: Exception!\n");
>>>>>            ret = 0;
>>>>>            break;
>>>>>        default:
>>>>> @@ -1041,8 +1046,7 @@ static int ct_process_request(struct
>>>>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>>>>        }
>>>>>          if (unlikely(ret)) {
>>>>> -        CT_ERROR(ct, "Failed to process request %04x (%pe)\n",
>>>>> -             action, ERR_PTR(ret));
>>>>> +        ct_err(ct, "Failed to process request %04x (%pe)\n", action,
>>>>> ERR_PTR(ret));
>>>>>            return ret;
>>>>>        }
>>>>>    @@ -1070,8 +1074,8 @@ static bool
>>>>> ct_process_incoming_requests(struct intel_guc_ct *ct)
>>>>>          err = ct_process_request(ct, request);
>>>>>        if (unlikely(err)) {
>>>>> -        CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n",
>>>>> -             ERR_PTR(err), 4 * request->size, request->msg);
>>>>> +        ct_err(ct, "Failed to process message (%pe) %*ph\n",
>>>>> +               ERR_PTR(err), 4 * request->size, request->msg);
>>>>>            ct_free_msg(request);
>>>>>        }
>>>>>    @@ -1149,8 +1153,8 @@ static int ct_handle_hxg(struct intel_guc_ct
>>>>> *ct, struct ct_incoming_msg *msg)
>>>>>          if (unlikely(err)) {
>>>>>    failed:
>>>>> -        CT_ERROR(ct, "Failed to handle HXG message (%pe) %*ph\n",
>>>>> -             ERR_PTR(err), 4 * GUC_HXG_MSG_MIN_LEN, hxg);
>>>>> +        ct_err(ct, "Failed to handle HXG message (%pe) %*ph\n",
>>>>> +               ERR_PTR(err), 4 * GUC_HXG_MSG_MIN_LEN, hxg);
>>>>>        }
>>>>>        return err;
>>>>>    }
>>>>> @@ -1166,8 +1170,8 @@ static void ct_handle_msg(struct intel_guc_ct
>>>>> *ct, struct ct_incoming_msg *msg)
>>>>>            err = -EOPNOTSUPP;
>>>>>          if (unlikely(err)) {
>>>>> -        CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n",
>>>>> -             ERR_PTR(err), 4 * msg->size, msg->msg);
>>>>> +        ct_err(ct, "Failed to handle message (%pe) %*ph\n",
>>>>> +               ERR_PTR(err), 4 * msg->size, msg->msg);
>>>>>            ct_free_msg(msg);
>>>>>        }
>>>>>    }
>>>>> @@ -1198,7 +1202,7 @@ static void ct_try_receive_message(struct
>>>>> intel_guc_ct *ct)
>>>>>    {
>>>>>        int ret;
>>>>>    -    if (GEM_WARN_ON(!ct->enabled))
>>>>> +    if (ct_WARN_ON(ct, !ct->enabled))
>>>>>            return;
>>>>>          ret = ct_receive(ct);
>>>>> @@ -1220,7 +1224,7 @@ static void ct_receive_tasklet_func(struct
>>>>> tasklet_struct *t)
>>>>>    void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
>>>>>    {
>>>>>        if (unlikely(!ct->enabled)) {
>>>>> -        WARN(1, "Unexpected GuC event received while CT
>>>>> disabled!\n");
>>>>> +        ct_warn(ct, "Unexpected event received while disabled!\n");
>>>>>            return;
>>>>>        }
>>>>>    
>
Tvrtko Ursulin Dec. 1, 2022, 12:01 p.m. UTC | #6
On 01/12/2022 11:56, Michal Wajdeczko wrote:
> On 01.12.2022 01:41, John Harrison wrote:
>> On 11/23/2022 12:45, Michal Wajdeczko wrote:
>>> On 23.11.2022 02:25, John Harrison wrote:
>>>> On 11/22/2022 09:54, Michal Wajdeczko wrote:
>>>>> On 18.11.2022 02:58, John.C.Harrison@Intel.com wrote:
>>>>>> From: John Harrison <John.C.Harrison@Intel.com>
>>>>>>
>>>>>> Re-work the existing GuC CT printers and extend as required to match
>>>>>> the new wrapping scheme.
>>>>>>
>>>>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 222
>>>>>> +++++++++++-----------
>>>>>>     1 file changed, 113 insertions(+), 109 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>>>> index 2b22065e87bf9..9d404fb377637 100644
>>>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>>>> @@ -18,31 +18,49 @@ static inline struct intel_guc *ct_to_guc(struct
>>>>>> intel_guc_ct *ct)
>>>>>>         return container_of(ct, struct intel_guc, ct);
>>>>>>     }
>>>>>>     -static inline struct intel_gt *ct_to_gt(struct intel_guc_ct *ct)
>>>>>> -{
>>>>>> -    return guc_to_gt(ct_to_guc(ct));
>>>>>> -}
>>>>>> -
>>>>>>     static inline struct drm_i915_private *ct_to_i915(struct
>>>>>> intel_guc_ct *ct)
>>>>>>     {
>>>>>> -    return ct_to_gt(ct)->i915;
>>>>>> -}
>>>>>> +    struct intel_guc *guc = ct_to_guc(ct);
>>>>>> +    struct intel_gt *gt = guc_to_gt(guc);
>>>>>>     -static inline struct drm_device *ct_to_drm(struct intel_guc_ct
>>>>>> *ct)
>>>>>> -{
>>>>>> -    return &ct_to_i915(ct)->drm;
>>>>>> +    return gt->i915;
>>>>>>     }
>>>>>>     -#define CT_ERROR(_ct, _fmt, ...) \
>>>>>> -    drm_err(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
>>>>>> +#define ct_err(_ct, _fmt, ...) \
>>>>>> +    guc_err(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>>>> +
>>>>>> +#define ct_warn(_ct, _fmt, ...) \
>>>>>> +    guc_warn(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>>>> +
>>>>>> +#define ct_notice(_ct, _fmt, ...) \
>>>>>> +    guc_notice(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>>>> +
>>>>>> +#define ct_info(_ct, _fmt, ...) \
>>>>>> +    guc_info(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>>>> +
>>>>>>     #ifdef CONFIG_DRM_I915_DEBUG_GUC
>>>>>> -#define CT_DEBUG(_ct, _fmt, ...) \
>>>>>> -    drm_dbg(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
>>>>>> +#define ct_dbg(_ct, _fmt, ...) \
>>>>>> +    guc_dbg(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
>>>>>>     #else
>>>>>> -#define CT_DEBUG(...)    do { } while (0)
>>>>>> +#define ct_dbg(...)    do { } while (0)
>>>>>>     #endif
>>>>>> -#define CT_PROBE_ERROR(_ct, _fmt, ...) \
>>>>>> -    i915_probe_error(ct_to_i915(ct), "CT: " _fmt, ##__VA_ARGS__)
>>>>>> +
>>>>>> +#define ct_probe_error(_ct, _fmt, ...) \
>>>>>> +    do { \
>>>>>> +        if (i915_error_injected()) \
>>>>>> +            ct_dbg(_ct, _fmt, ##__VA_ARGS__); \
>>>>>> +        else \
>>>>>> +            ct_err(_ct, _fmt, ##__VA_ARGS__); \
>>>>>> +    } while (0)
>>>>> guc_probe_error ?
>>>>>
>>>>>> +
>>>>>> +#define ct_WARN_ON(_ct, _condition) \
>>>>>> +    ct_WARN(_ct, _condition, "%s", "ct_WARN_ON("
>>>>>> __stringify(_condition) ")")
>>>>>> +
>>>>>> +#define ct_WARN(_ct, _condition, _fmt, ...) \
>>>>>> +    guc_WARN(ct_to_guc(_ct), _condition, "CT " _fmt, ##__VA_ARGS__)
>>>>>> +
>>>>>> +#define ct_WARN_ONCE(_ct, _condition, _fmt, ...) \
>>>>>> +    guc_WARN_ONCE(ct_to_guc(_ct), _condition, "CT " _fmt,
>>>>>> ##__VA_ARGS__)
>>>>>>       /**
>>>>>>      * DOC: CTB Blob
>>>>>> @@ -170,7 +188,7 @@ static int ct_control_enable(struct intel_guc_ct
>>>>>> *ct, bool enable)
>>>>>>         err = guc_action_control_ctb(ct_to_guc(ct), enable ?
>>>>>>                          GUC_CTB_CONTROL_ENABLE :
>>>>>> GUC_CTB_CONTROL_DISABLE);
>>>>>>         if (unlikely(err))
>>>>>> -        CT_PROBE_ERROR(ct, "Failed to control/%s CTB (%pe)\n",
>>>>>> +        ct_probe_error(ct, "Failed to control/%s CTB (%pe)\n",
>>>>>>                        str_enable_disable(enable), ERR_PTR(err));
>>>>> btw, shouldn't we change all messages to start with lowercase ?
>>>>>
>>>>> was:
>>>>>       "CT0: Failed to control/%s CTB (%pe)"
>>>>> is:
>>>>>       "GT0: GuC CT Failed to control/%s CTB (%pe)"
>>>>>
>>>>> unless we keep colon (as suggested by Tvrtko) as then:
>>>>>
>>>>>       "GT0: GuC CT: Failed to control/%s CTB (%pe)"
>>>> Blanket added the colon makes it messy when a string actually wants to
>>>> start with the prefix. The rule I've been using is lower case word when
>>>> the prefix was part of the string, upper case word when the prefix is
>>> Hmm, I'm not sure that we should attempt to have such a flexible rule as
>>> we shouldn't rely too much on actual format of the prefix as it could be
>>> changed any time.  All we should know about final log message is that it
>>> _will_ properly identify the "GT" or "GuC" that this log is related to.
>>>
>>> So I would suggest to be just consistent and probably always start with
>>> upper case, as that seems to be mostly used in kernel error logs, and
>>> just make sure that any prefix will honor that (by including colon, or
>>> braces), so this will always work like:
>>>
>>> "[drm] *ERROR* GT0: Failed to foo (-EIO)"
>>> "[drm] *ERROR* GT0: GUC: Failed to foo (-EIO)"
>>> "[drm] *ERROR* GT0: GUC: CT: Failed to foo (-EIO)"
>>>
>>> or
>>>
>>> "[drm] *ERROR* GT0: Failed to foo (-EIO)"
>>> "[drm] *ERROR* GT0: [GUC] Failed to foo (-EIO)"
>>> "[drm] *ERROR* GT0: [GUC] CT: Failed to foo (-EIO)"
>>>
>>> and even for:
>>>
>>> "[drm] *ERROR* GT(root) Failed to foo (-EIO)"
>>> "[drm] *ERROR* GuC(media) Failed to foo (-EIO)"
>>> "[drm] *ERROR* GT0 [GuC:CT] Failed to foo (-EIO)"
>> All of which are hideous/complex/verbose/inconsistent. 'GT0: GUC: CT:'?
>> Really? Or 'GT0: [GUC] CT:'? Why the random mix of separators? And how
>> would you implement '[GUC:CT]' without having a CT definition that is
>> entirely self contained and does chain on to the GuC level version?
> 
> you missed the point, as those were just examples of different possible
> prefixes that one could define, to show that actual message shall not
> make any assumption how such prefix will look like or how it will end
> (like with or w/o colon, with "GuC" or "GT" tag or whatever)
> 
>>
>> This is pointless bikeshedding. If you want to re-write every single
>> debug print (yet again) and invent much more complicated macro
> 
> the opposite, I want clear understanding how messages should be written
> to *avoid* rewriting them if (for some reason) we decide to change or
> update the prefix in the future
> 
>> definitions then feel free to take over the patch set. If not can we
>> just approve the v3 version and move on to doing some actual work?
> 
> if everyone is happy that there is inconsistency in use between gt_xxx
> messages where we shall be using messages starting with upper case
> (since prefix ends with colon) and guc/ct_xxx messages where we shall be
> using lower case message (since there is a known prefix without colon,
> either "GuC" or "CT") then I'll be also fine, but for now that bothers
> me a little, hence asking for clarifications/agreement
> 
> and while for dbg level messages it doesn't matter, I assume we should
> be consistent for err/warn/info messages (as those will eventually show
> up to the end user) so let maintainers decide here what is expectation here

Could we have some examples pasted here, of the end result of this 
series, for all message "categories" (origins, macros, whatever)?

Regards,

Tvrtko
John Harrison Dec. 2, 2022, 8:14 p.m. UTC | #7
On 12/1/2022 04:01, Tvrtko Ursulin wrote:
> On 01/12/2022 11:56, Michal Wajdeczko wrote:
>> On 01.12.2022 01:41, John Harrison wrote:
>>> On 11/23/2022 12:45, Michal Wajdeczko wrote:
>>>> On 23.11.2022 02:25, John Harrison wrote:
>>>>> On 11/22/2022 09:54, Michal Wajdeczko wrote:
>>>>>> On 18.11.2022 02:58, John.C.Harrison@Intel.com wrote:
>>>>>>> From: John Harrison <John.C.Harrison@Intel.com>
>>>>>>>
>>>>>>> Re-work the existing GuC CT printers and extend as required to 
>>>>>>> match
>>>>>>> the new wrapping scheme.
>>>>>>>
>>>>>>>
>>>>>>> [snip]...
>>>>>>>
>>>>>>>       /**
>>>>>>>      * DOC: CTB Blob
>>>>>>> @@ -170,7 +188,7 @@ static int ct_control_enable(struct 
>>>>>>> intel_guc_ct
>>>>>>> *ct, bool enable)
>>>>>>>         err = guc_action_control_ctb(ct_to_guc(ct), enable ?
>>>>>>>                          GUC_CTB_CONTROL_ENABLE :
>>>>>>> GUC_CTB_CONTROL_DISABLE);
>>>>>>>         if (unlikely(err))
>>>>>>> -        CT_PROBE_ERROR(ct, "Failed to control/%s CTB (%pe)\n",
>>>>>>> +        ct_probe_error(ct, "Failed to control/%s CTB (%pe)\n",
>>>>>>>                        str_enable_disable(enable), ERR_PTR(err));
>>>>>> btw, shouldn't we change all messages to start with lowercase ?
>>>>>>
>>>>>> was:
>>>>>>       "CT0: Failed to control/%s CTB (%pe)"
>>>>>> is:
>>>>>>       "GT0: GuC CT Failed to control/%s CTB (%pe)"
>>>>>>
>>>>>> unless we keep colon (as suggested by Tvrtko) as then:
>>>>>>
>>>>>>       "GT0: GuC CT: Failed to control/%s CTB (%pe)"
>>>>> Blanket added the colon makes it messy when a string actually 
>>>>> wants to
>>>>> start with the prefix. The rule I've been using is lower case word 
>>>>> when
>>>>> the prefix was part of the string, upper case word when the prefix is
>>>> Hmm, I'm not sure that we should attempt to have such a flexible 
>>>> rule as
>>>> we shouldn't rely too much on actual format of the prefix as it 
>>>> could be
>>>> changed any time.  All we should know about final log message is 
>>>> that it
>>>> _will_ properly identify the "GT" or "GuC" that this log is related 
>>>> to.
>>>>
>>>> So I would suggest to be just consistent and probably always start 
>>>> with
>>>> upper case, as that seems to be mostly used in kernel error logs, and
>>>> just make sure that any prefix will honor that (by including colon, or
>>>> braces), so this will always work like:
>>>>
>>>> "[drm] *ERROR* GT0: Failed to foo (-EIO)"
>>>> "[drm] *ERROR* GT0: GUC: Failed to foo (-EIO)"
>>>> "[drm] *ERROR* GT0: GUC: CT: Failed to foo (-EIO)"
>>>>
>>>> or
>>>>
>>>> "[drm] *ERROR* GT0: Failed to foo (-EIO)"
>>>> "[drm] *ERROR* GT0: [GUC] Failed to foo (-EIO)"
>>>> "[drm] *ERROR* GT0: [GUC] CT: Failed to foo (-EIO)"
>>>>
>>>> and even for:
>>>>
>>>> "[drm] *ERROR* GT(root) Failed to foo (-EIO)"
>>>> "[drm] *ERROR* GuC(media) Failed to foo (-EIO)"
>>>> "[drm] *ERROR* GT0 [GuC:CT] Failed to foo (-EIO)"
>>> All of which are hideous/complex/verbose/inconsistent. 'GT0: GUC: CT:'?
>>> Really? Or 'GT0: [GUC] CT:'? Why the random mix of separators? And how
>>> would you implement '[GUC:CT]' without having a CT definition that is
>>> entirely self contained and does chain on to the GuC level version?
>>
>> you missed the point, as those were just examples of different possible
>> prefixes that one could define, to show that actual message shall not
>> make any assumption how such prefix will look like or how it will end
>> (like with or w/o colon, with "GuC" or "GT" tag or whatever)
The point is that none of those are ever likely to happen so are 
meaningless to prepare for.

>>
>>>
>>> This is pointless bikeshedding. If you want to re-write every single
>>> debug print (yet again) and invent much more complicated macro
>>
>> the opposite, I want clear understanding how messages should be written
>> to *avoid* rewriting them if (for some reason) we decide to change or
>> update the prefix in the future
You say that like there is any consistency or regulation at all on how 
messages are currently written.

>>
>>> definitions then feel free to take over the patch set. If not can we
>>> just approve the v3 version and move on to doing some actual work?
>>
>> if everyone is happy that there is inconsistency in use between gt_xxx
>> messages where we shall be using messages starting with upper case
>> (since prefix ends with colon) and guc/ct_xxx messages where we shall be
>> using lower case message (since there is a known prefix without colon,
>> either "GuC" or "CT") then I'll be also fine, but for now that bothers
>> me a little, hence asking for clarifications/agreement
>>
I don't think anyone is happy with anything.

Personally, I don't like the idea of adding fixed prefixes for every 
subsystem just for the sake of doing so. Having a wrapper that abstracts 
out the messy structure mangling of getting from a 'ct' object back to a 
'gt' object maybe has use. But with regards to fixed prefixes, all I 
ever wanted was to add GT# (because that adds useful information that is 
other completely lacking) and, ideally, a display prefix for all of the 
display prints (because they completely swamp all other output in CI 
dmesg logs, so being able to trivially filter them out would be 
incredibly useful). Beyond that, it seems like adding work and forced 
formatting just for the sake of it. If a print wants to say 'Error 
received on CT read channel' then why should it be forced to be 'GuC CT 
Error received on read channel' which is coded as 'Error received on 
read channel' ? To me, that seems less clear both in dmesg and in code.

And for the record, I didn't want to add the GT thing as message 
'prefix' either. I would rather have just extended the PCI address that 
is printed by the dev level printer. But that would have been a much 
more invasive change at many levels outside of i915. As in, "i915 
0000:00:02.0: [drm]" -> "i915 0000:00:02.0:GT0 [drm]".

And here we are a month down the road and arguing over whether a three 
level deep prefix should have a colon or braces around each level's 
forced message prefix!? Meanwhile, it is still impossible to know which 
GT produced a given error in a multi-GT system and the ability to filter 
out display messages has been completely nacked for reasons I don't 
understand at all before I even got to suggest it.

>> and while for dbg level messages it doesn't matter, I assume we should
>> be consistent for err/warn/info messages (as those will eventually show
>> up to the end user) so let maintainers decide here what is 
>> expectation here
>
> Could we have some examples pasted here, of the end result of this 
> series, for all message "categories" (origins, macros, whatever)?

GT initialisation:
gt_err(gt, "Failed to allocate scratch page\n");
i915 0000:00:02.0: [drm] GT0: Failed to allocate scratch page

G2H notification handler:
guc_err(guc, "notification: Invalid length %u for deregister done\n", len);
i915 0000:00:02.0: [drm] GT0: GuC notification: Invalid length 0 for 
deregister done

CTB initialisation:
ct_probe_error(ct, "Failed to control/%s CTB (%pe)\n", 
str_enable_disable(enable), ERR_PTR(err));
i915 0000:00:02.0: [drm] GT0: GuC CT Failed to control/enable CTB (EINVAL)

Random meaningless (to me) message that is apparently a display thing:
drm_dbg_kms(&dev_priv->drm, "disabling %s\n", pll->info->name);
i915 0000:00:02.0: [drm:intel_disable_shared_dpll [i915]] disabling PORT 
PLL B

I'm sure you can extrapolate to all other forms of dbg, notice, info, 
etc. without me having to manually type each one out, given that they 
are all identical.

Personally, I think the above should be just:
gt_err(gt, "Failed to allocate scratch page\n");
i915 0000:00:02.0: [drm] GT0: Failed to allocate scratch page

gt_err(guc_to_gt(guc), "G2H: Invalid length for deregister done: %u\n", 
len);
i915 0000:00:02.0: [drm] GT0: G2H: Invalid length for deregister done: 0

gt_probe_error(ct_to_gt(ct), "Failed to %s CT %d buffer (%pe)\n", 
str_enable_disable(enable), send ? "SEND" : "RECV", ERR_PTR(err));
i915 0000:00:02.0: [drm] GT0: Failed to enable CT SEND buffer (EINVAL)

drm_dbg_kms(&dev_priv->drm, "disabling %s\n", pll->info->name);
i915 0000:00:02.0: [drm:intel_disable_shared_dpll [i915-KMS]] disabling 
PORT PLL B
But presumably that requires finishing the plan of splitting out the 
display code into a separate driver. So for now, something like this 
would still be a massive improvement:
kms_dbg(dev_priv, "disabling %s\n", pll->info->name);
i915 0000:00:02.0: [drm:intel_disable_shared_dpll [i915]] KMS: disabling 
PORT PLL B

John.

>
> Regards,
>
> Tvrtko
Tvrtko Ursulin Dec. 5, 2022, 1:16 p.m. UTC | #8
On 02/12/2022 20:14, John Harrison wrote:

>>> and while for dbg level messages it doesn't matter, I assume we should
>>> be consistent for err/warn/info messages (as those will eventually show
>>> up to the end user) so let maintainers decide here what is 
>>> expectation here
>>
>> Could we have some examples pasted here, of the end result of this 
>> series, for all message "categories" (origins, macros, whatever)?
> 
> GT initialisation:
> gt_err(gt, "Failed to allocate scratch page\n");
> i915 0000:00:02.0: [drm] GT0: Failed to allocate scratch page
> 
> G2H notification handler:
> guc_err(guc, "notification: Invalid length %u for deregister done\n", len);
> i915 0000:00:02.0: [drm] GT0: GuC notification: Invalid length 0 for 
> deregister done

I'm not liking the inconsistency between gt_err and guc_err where with latter callers either need to start the message with lower case because of the unstructured "GuC " prefix added. Which then reads bad if callers do guc_err(guc, "Error X happend").

Looks like Michal was pointing out the same thing, AFAIU at least when re-reading the thread now.

Why wouldn't this work:

guc_err(guc, "Invalid length %u for deregister done notification\n", len);
i915 0000:00:02.0: [drm] GT0: GuC: Invalid length 0 for deregister done notification

Or if the use case for adding custom prefixes is strong then maybe consider:

guc_err(guc, "notification", "Invalid length 0 for deregister done");
i915 0000:00:02.0: [drm] GT0: GuC notification: Invalid length 0 for deregister done

guc_err(guc, "", "Error X");
i915 0000:00:02.0: [drm] GT0: GuC: Error X

> CTB initialisation:
> ct_probe_error(ct, "Failed to control/%s CTB (%pe)\n", 
> str_enable_disable(enable), ERR_PTR(err));
> i915 0000:00:02.0: [drm] GT0: GuC CT Failed to control/enable CTB (EINVAL)

Okay same as above.

> Random meaningless (to me) message that is apparently a display thing:
> drm_dbg_kms(&dev_priv->drm, "disabling %s\n", pll->info->name);
> i915 0000:00:02.0: [drm:intel_disable_shared_dpll [i915]] disabling PORT 
> PLL B

Plan is to not touch outside gt/.

> I'm sure you can extrapolate to all other forms of dbg, notice, info, 
> etc. without me having to manually type each one out, given that they 
> are all identical.
> 
> Personally, I think the above should be just:
> gt_err(gt, "Failed to allocate scratch page\n");
> i915 0000:00:02.0: [drm] GT0: Failed to allocate scratch page
> 
> gt_err(guc_to_gt(guc), "G2H: Invalid length for deregister done: %u\n", 
> len);
> i915 0000:00:02.0: [drm] GT0: G2H: Invalid length for deregister done: 0
> 
> gt_probe_error(ct_to_gt(ct), "Failed to %s CT %d buffer (%pe)\n", 
> str_enable_disable(enable), send ? "SEND" : "RECV", ERR_PTR(err));
> i915 0000:00:02.0: [drm] GT0: Failed to enable CT SEND buffer (EINVAL)

We could but it seems we agreed some weeks ago to consolidate the existing CT_ERROR macros and such in this exercise. At least no objections were raised to that plan.

If now we want to go back on that, and if you want to have guc_to_gt(guc) in all gt/uc/ call sites that's fine by me, but please get some acks and consensus from people who work in that area. And under that option someone would also need to convert the CT code to new macros.

Regards,

Tvrtko

> drm_dbg_kms(&dev_priv->drm, "disabling %s\n", pll->info->name);
> i915 0000:00:02.0: [drm:intel_disable_shared_dpll [i915-KMS]] disabling 
> PORT PLL B
> But presumably that requires finishing the plan of splitting out the 
> display code into a separate driver. So for now, something like this 
> would still be a massive improvement:
> kms_dbg(dev_priv, "disabling %s\n", pll->info->name);
> i915 0000:00:02.0: [drm:intel_disable_shared_dpll [i915]] KMS: disabling 
> PORT PLL B
> 
> John.
> 
>>
>> Regards,
>>
>> Tvrtko
>
Michal Wajdeczko Dec. 5, 2022, 6:44 p.m. UTC | #9
On 05.12.2022 14:16, Tvrtko Ursulin wrote:
> 
> On 02/12/2022 20:14, John Harrison wrote:
> 
>>>> and while for dbg level messages it doesn't matter, I assume we should
>>>> be consistent for err/warn/info messages (as those will eventually show
>>>> up to the end user) so let maintainers decide here what is
>>>> expectation here
>>>
>>> Could we have some examples pasted here, of the end result of this
>>> series, for all message "categories" (origins, macros, whatever)?
>>
>> GT initialisation:
>> gt_err(gt, "Failed to allocate scratch page\n");
>> i915 0000:00:02.0: [drm] GT0: Failed to allocate scratch page
>>
>> G2H notification handler:
>> guc_err(guc, "notification: Invalid length %u for deregister done\n",
>> len);
>> i915 0000:00:02.0: [drm] GT0: GuC notification: Invalid length 0 for
>> deregister done

please note that today this message is coded as:

drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
-> i915 0000:00:02.0: [drm] Invalid length %u

which makes this rather an example of meaningless log

> 
> I'm not liking the inconsistency between gt_err and guc_err where with
> latter callers either need to start the message with lower case because
> of the unstructured "GuC " prefix added. Which then reads bad if callers
> do guc_err(guc, "Error X happend").
> 
> Looks like Michal was pointing out the same thing, AFAIU at least when
> re-reading the thread now.
> 
> Why wouldn't this work:
> 
> guc_err(guc, "Invalid length %u for deregister done notification\n", len);
> i915 0000:00:02.0: [drm] GT0: GuC: Invalid length 0 for deregister done
> notification

+1

> 
> Or if the use case for adding custom prefixes is strong then maybe
> consider:
> 
> guc_err(guc, "notification", "Invalid length 0 for deregister done");
> i915 0000:00:02.0: [drm] GT0: GuC notification: Invalid length 0 for
> deregister done
> 
> guc_err(guc, "", "Error X");
> i915 0000:00:02.0: [drm] GT0: GuC: Error X

-1

this will make logging macros too different from others (unless we
hide/use prefixes inside macros only, but I'm not sure there is any ROI)

> 
>> CTB initialisation:
>> ct_probe_error(ct, "Failed to control/%s CTB (%pe)\n",
>> str_enable_disable(enable), ERR_PTR(err));
>> i915 0000:00:02.0: [drm] GT0: GuC CT Failed to control/enable CTB
>> (EINVAL)
> 
> Okay same as above.
> 
>> Random meaningless (to me) message that is apparently a display thing:
>> drm_dbg_kms(&dev_priv->drm, "disabling %s\n", pll->info->name);
>> i915 0000:00:02.0: [drm:intel_disable_shared_dpll [i915]] disabling
>> PORT PLL B
> 
> Plan is to not touch outside gt/.
> 
>> I'm sure you can extrapolate to all other forms of dbg, notice, info,
>> etc. without me having to manually type each one out, given that they
>> are all identical.
>>
>> Personally, I think the above should be just:
>> gt_err(gt, "Failed to allocate scratch page\n");
>> i915 0000:00:02.0: [drm] GT0: Failed to allocate scratch page
>>
>> gt_err(guc_to_gt(guc), "G2H: Invalid length for deregister done:
>> %u\n", len);
>> i915 0000:00:02.0: [drm] GT0: G2H: Invalid length for deregister done: 0

that's probably should be:

	"Invalid length for G2H deregister done: %u\n

and it will still just look fine if we auto append the 'GuC' prefix:

i915 0000:00:02.0: [drm] GT0: GuC: Invalid length for G2H deregister

>>
>> gt_probe_error(ct_to_gt(ct), "Failed to %s CT %d buffer (%pe)\n",
>> str_enable_disable(enable), send ? "SEND" : "RECV", ERR_PTR(err));
>> i915 0000:00:02.0: [drm] GT0: Failed to enable CT SEND buffer (EINVAL)

having "GuC/CT" prefix here will also look fine:

i915 0000:00:02.0: [drm] GT0: GuC: Failed to enable CT SEND buffer
i915 0000:00:02.0: [drm] GT0: GuC: CT: Failed to enable SEND buffer
i915 0000:00:02.0: [drm] GT0: CT: Failed to enable SEND buffer

> 
> We could but it seems we agreed some weeks ago to consolidate the
> existing CT_ERROR macros and such in this exercise. At least no
> objections were raised to that plan.
> 
> If now we want to go back on that, and if you want to have
> guc_to_gt(guc) in all gt/uc/ call sites that's fine by me, but please
> get some acks and consensus from people who work in that area. And under
> that option someone would also need to convert the CT code to new macros.

while the main goal of this series was to have GT# appended to the log
messages but we also wanted to simplify the use of the logging macros by
passing the component pointer directly (with extra *bonus* that allows
to auto append component specific prefix, if any, like CT macros do)

IMHO adding guc_xxx() macros with "GuC:" prefix will do the trick and
since many of the existing GuC related logs are already broken or
incomplete, we might fix them accordingly.

In other words in addition to gt_xxx() I still want additional guc_xxx()
macros (as it will allow us to fix related messages) and ct_xxx() macros
(as we already have CT_xxx so no need to change anything)

Michal

> 
> Regards,
> 
> Tvrtko
> 
>> drm_dbg_kms(&dev_priv->drm, "disabling %s\n", pll->info->name);
>> i915 0000:00:02.0: [drm:intel_disable_shared_dpll [i915-KMS]]
>> disabling PORT PLL B
>> But presumably that requires finishing the plan of splitting out the
>> display code into a separate driver. So for now, something like this
>> would still be a massive improvement:
>> kms_dbg(dev_priv, "disabling %s\n", pll->info->name);
>> i915 0000:00:02.0: [drm:intel_disable_shared_dpll [i915]] KMS:
>> disabling PORT PLL B
>>
>> John.
>>
>>>
>>> Regards,
>>>
>>> Tvrtko
>>
Tvrtko Ursulin Dec. 6, 2022, 11:06 a.m. UTC | #10
On 05/12/2022 18:44, Michal Wajdeczko wrote:
> On 05.12.2022 14:16, Tvrtko Ursulin wrote:
>>
>> On 02/12/2022 20:14, John Harrison wrote:
>>
>>>>> and while for dbg level messages it doesn't matter, I assume we should
>>>>> be consistent for err/warn/info messages (as those will eventually show
>>>>> up to the end user) so let maintainers decide here what is
>>>>> expectation here
>>>>
>>>> Could we have some examples pasted here, of the end result of this
>>>> series, for all message "categories" (origins, macros, whatever)?
>>>
>>> GT initialisation:
>>> gt_err(gt, "Failed to allocate scratch page\n");
>>> i915 0000:00:02.0: [drm] GT0: Failed to allocate scratch page
>>>
>>> G2H notification handler:
>>> guc_err(guc, "notification: Invalid length %u for deregister done\n",
>>> len);
>>> i915 0000:00:02.0: [drm] GT0: GuC notification: Invalid length 0 for
>>> deregister done
> 
> please note that today this message is coded as:
> 
> drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
> -> i915 0000:00:02.0: [drm] Invalid length %u
> 
> which makes this rather an example of meaningless log

Okay, so log text needs improving anyway which is orthogonal.

>> I'm not liking the inconsistency between gt_err and guc_err where with
>> latter callers either need to start the message with lower case because
>> of the unstructured "GuC " prefix added. Which then reads bad if callers
>> do guc_err(guc, "Error X happend").
>>
>> Looks like Michal was pointing out the same thing, AFAIU at least when
>> re-reading the thread now.
>>
>> Why wouldn't this work:
>>
>> guc_err(guc, "Invalid length %u for deregister done notification\n", len);
>> i915 0000:00:02.0: [drm] GT0: GuC: Invalid length 0 for deregister done
>> notification
> 
> +1
> 
>>
>> Or if the use case for adding custom prefixes is strong then maybe
>> consider:
>>
>> guc_err(guc, "notification", "Invalid length 0 for deregister done");
>> i915 0000:00:02.0: [drm] GT0: GuC notification: Invalid length 0 for
>> deregister done
>>
>> guc_err(guc, "", "Error X");
>> i915 0000:00:02.0: [drm] GT0: GuC: Error X
> 
> -1
> 
> this will make logging macros too different from others (unless we
> hide/use prefixes inside macros only, but I'm not sure there is any ROI)

Yeah I said if the use case is strong, no strong opinion either way.

>>> CTB initialisation:
>>> ct_probe_error(ct, "Failed to control/%s CTB (%pe)\n",
>>> str_enable_disable(enable), ERR_PTR(err));
>>> i915 0000:00:02.0: [drm] GT0: GuC CT Failed to control/enable CTB
>>> (EINVAL)
>>
>> Okay same as above.
>>
>>> Random meaningless (to me) message that is apparently a display thing:
>>> drm_dbg_kms(&dev_priv->drm, "disabling %s\n", pll->info->name);
>>> i915 0000:00:02.0: [drm:intel_disable_shared_dpll [i915]] disabling
>>> PORT PLL B
>>
>> Plan is to not touch outside gt/.
>>
>>> I'm sure you can extrapolate to all other forms of dbg, notice, info,
>>> etc. without me having to manually type each one out, given that they
>>> are all identical.
>>>
>>> Personally, I think the above should be just:
>>> gt_err(gt, "Failed to allocate scratch page\n");
>>> i915 0000:00:02.0: [drm] GT0: Failed to allocate scratch page
>>>
>>> gt_err(guc_to_gt(guc), "G2H: Invalid length for deregister done:
>>> %u\n", len);
>>> i915 0000:00:02.0: [drm] GT0: G2H: Invalid length for deregister done: 0
> 
> that's probably should be:
> 
> 	"Invalid length for G2H deregister done: %u\n
> 
> and it will still just look fine if we auto append the 'GuC' prefix:
> 
> i915 0000:00:02.0: [drm] GT0: GuC: Invalid length for G2H deregister
> 
>>>
>>> gt_probe_error(ct_to_gt(ct), "Failed to %s CT %d buffer (%pe)\n",
>>> str_enable_disable(enable), send ? "SEND" : "RECV", ERR_PTR(err));
>>> i915 0000:00:02.0: [drm] GT0: Failed to enable CT SEND buffer (EINVAL)
> 
> having "GuC/CT" prefix here will also look fine:
> 
> i915 0000:00:02.0: [drm] GT0: GuC: Failed to enable CT SEND buffer
> i915 0000:00:02.0: [drm] GT0: GuC: CT: Failed to enable SEND buffer
> i915 0000:00:02.0: [drm] GT0: CT: Failed to enable SEND buffer

Works for me.

>> We could but it seems we agreed some weeks ago to consolidate the
>> existing CT_ERROR macros and such in this exercise. At least no
>> objections were raised to that plan.
>>
>> If now we want to go back on that, and if you want to have
>> guc_to_gt(guc) in all gt/uc/ call sites that's fine by me, but please
>> get some acks and consensus from people who work in that area. And under
>> that option someone would also need to convert the CT code to new macros.
> 
> while the main goal of this series was to have GT# appended to the log
> messages but we also wanted to simplify the use of the logging macros by
> passing the component pointer directly (with extra *bonus* that allows
> to auto append component specific prefix, if any, like CT macros do)
> 
> IMHO adding guc_xxx() macros with "GuC:" prefix will do the trick and
> since many of the existing GuC related logs are already broken or
> incomplete, we might fix them accordingly.
> 
> In other words in addition to gt_xxx() I still want additional guc_xxx()
> macros (as it will allow us to fix related messages) and ct_xxx() macros
> (as we already have CT_xxx so no need to change anything)

Both approaches are fine by me as long as it's logical and consistent 
and we manage not to leave the conversion half-done for too long.

Maybe as a way forward work could be split? If John wants to deal with 
gt_xxx macros, avoid touching GuC (putting his original motivation 
aside) and you want to convert the gt/uc folder? Assuming John you are 
okay with "GuC:" and "CT:" prefixes.

Regards,

Tvrtko
John Harrison Jan. 6, 2023, 6:57 p.m. UTC | #11
On 12/6/2022 03:06, Tvrtko Ursulin wrote:
> On 05/12/2022 18:44, Michal Wajdeczko wrote:
>> On 05.12.2022 14:16, Tvrtko Ursulin wrote:
>>> On 02/12/2022 20:14, John Harrison wrote:
>>> [snip]
>>>
>>>> Random meaningless (to me) message that is apparently a display thing:
>>>> drm_dbg_kms(&dev_priv->drm, "disabling %s\n", pll->info->name);
>>>> i915 0000:00:02.0: [drm:intel_disable_shared_dpll [i915]] disabling
>>>> PORT PLL B
>>>
>>> Plan is to not touch outside gt/.
For some unexplicable reason that means it is almost impossible to see 
the actual problems in most CI dmesg logs because they are swamped with 
irrelevant display messages that cannot be filtered out. For example, I 
recently manually grep'd out all the display spam from a bug report log. 
The dmesg file went from 12MB to 700KB. That is a significant problem 
that makes bug triage way harder than it needs to be.

>
> Maybe as a way forward work could be split? If John wants to deal with 
> gt_xxx macros, avoid touching GuC (putting his original motivation 
> aside) and you want to convert the gt/uc folder? Assuming John you are 
> okay with "GuC:" and "CT:" prefixes.
Meaning just repost patch #1 only and expand to more intel_gt_* files? 
Sure, if someone will actually reply to that patch with some kind of r-b 
first so I know I'm not still wasting my time on a huge re-write that 
will to be redone multiple times when someone objects to the use of a 
colon or the lack of spaces, braces or whatever.

John.
Jani Nikula Jan. 9, 2023, 9:38 a.m. UTC | #12
On Fri, 06 Jan 2023, John Harrison <john.c.harrison@intel.com> wrote:
> On 12/6/2022 03:06, Tvrtko Ursulin wrote:
>> On 05/12/2022 18:44, Michal Wajdeczko wrote:
>>> On 05.12.2022 14:16, Tvrtko Ursulin wrote:
>>>> On 02/12/2022 20:14, John Harrison wrote:
>>>> [snip]
>>>>
>>>>> Random meaningless (to me) message that is apparently a display thing:
>>>>> drm_dbg_kms(&dev_priv->drm, "disabling %s\n", pll->info->name);
>>>>> i915 0000:00:02.0: [drm:intel_disable_shared_dpll [i915]] disabling
>>>>> PORT PLL B
>>>>
>>>> Plan is to not touch outside gt/.
> For some unexplicable reason that means it is almost impossible to see 
> the actual problems in most CI dmesg logs because they are swamped with 
> irrelevant display messages that cannot be filtered out. For example, I 
> recently manually grep'd out all the display spam from a bug report log. 
> The dmesg file went from 12MB to 700KB. That is a significant problem 
> that makes bug triage way harder than it needs to be.

You can adjust drm.debug module parameter to get rid of almost all
display debugs. They're logged using the appropriate debug categories.


BR,
Jani.
Tvrtko Ursulin Jan. 9, 2023, 9:39 a.m. UTC | #13
On 06/01/2023 18:57, John Harrison wrote:
> On 12/6/2022 03:06, Tvrtko Ursulin wrote:
>> On 05/12/2022 18:44, Michal Wajdeczko wrote:
>>> On 05.12.2022 14:16, Tvrtko Ursulin wrote:
>>>> On 02/12/2022 20:14, John Harrison wrote:
>>>> [snip]
>>>>
>>>>> Random meaningless (to me) message that is apparently a display thing:
>>>>> drm_dbg_kms(&dev_priv->drm, "disabling %s\n", pll->info->name);
>>>>> i915 0000:00:02.0: [drm:intel_disable_shared_dpll [i915]] disabling
>>>>> PORT PLL B
>>>>
>>>> Plan is to not touch outside gt/.
> For some unexplicable reason that means it is almost impossible to see 
> the actual problems in most CI dmesg logs because they are swamped with 
> irrelevant display messages that cannot be filtered out. For example, I 
> recently manually grep'd out all the display spam from a bug report log. 
> The dmesg file went from 12MB to 700KB. That is a significant problem 
> that makes bug triage way harder than it needs to be.

I didn't get this part, how it would reduce the amount of spam by adding 
new macros? Anyway, that's something to split out and discuss with 
display folks.

>> Maybe as a way forward work could be split? If John wants to deal with 
>> gt_xxx macros, avoid touching GuC (putting his original motivation 
>> aside) and you want to convert the gt/uc folder? Assuming John you are 
>> okay with "GuC:" and "CT:" prefixes.
> Meaning just repost patch #1 only and expand to more intel_gt_* files? 
> Sure, if someone will actually reply to that patch with some kind of r-b 
> first so I know I'm not still wasting my time on a huge re-write that 
> will to be redone multiple times when someone objects to the use of a 
> colon or the lack of spaces, braces or whatever.

First patch looks good to me (ack in principle) apart that Michal found 
one potential null pointer dereference if I understood it right. That 
other comment about the ratelimited call is maybe okay to leave for 
later, *if* it will be a single instance, otherwise needs a gt logger as 
well. I can r-b once you re-send with the first issue fixed.

Regards,

Tvrtko
John Harrison Jan. 9, 2023, 8:33 p.m. UTC | #14
On 1/9/2023 01:38, Jani Nikula wrote:
> On Fri, 06 Jan 2023, John Harrison <john.c.harrison@intel.com> wrote:
>> On 12/6/2022 03:06, Tvrtko Ursulin wrote:
>>> On 05/12/2022 18:44, Michal Wajdeczko wrote:
>>>> On 05.12.2022 14:16, Tvrtko Ursulin wrote:
>>>>> On 02/12/2022 20:14, John Harrison wrote:
>>>>> [snip]
>>>>>
>>>>>> Random meaningless (to me) message that is apparently a display thing:
>>>>>> drm_dbg_kms(&dev_priv->drm, "disabling %s\n", pll->info->name);
>>>>>> i915 0000:00:02.0: [drm:intel_disable_shared_dpll [i915]] disabling
>>>>>> PORT PLL B
>>>>> Plan is to not touch outside gt/.
>> For some unexplicable reason that means it is almost impossible to see
>> the actual problems in most CI dmesg logs because they are swamped with
>> irrelevant display messages that cannot be filtered out. For example, I
>> recently manually grep'd out all the display spam from a bug report log.
>> The dmesg file went from 12MB to 700KB. That is a significant problem
>> that makes bug triage way harder than it needs to be.
> You can adjust drm.debug module parameter to get rid of almost all
> display debugs. They're logged using the appropriate debug categories.
No, you can't. See above comment about 'most CI dmesg logs'. This is 
when trying to triage bugs created by the CI systems. In that case, the 
log already exists and it was generated at full debug and it is tens if 
not hundreds of MBs in size. And there is no single tag attached to the 
display messages to run 'grep -v' on. They are just a random collection 
of disparate function names.

John.

>
>
> BR,
> Jani.
>
>
John Harrison Jan. 9, 2023, 8:36 p.m. UTC | #15
On 1/9/2023 01:39, Tvrtko Ursulin wrote:
> On 06/01/2023 18:57, John Harrison wrote:
>> On 12/6/2022 03:06, Tvrtko Ursulin wrote:
>>> On 05/12/2022 18:44, Michal Wajdeczko wrote:
>>>> On 05.12.2022 14:16, Tvrtko Ursulin wrote:
>>>>> On 02/12/2022 20:14, John Harrison wrote:
>>>>> [snip]
>>>>>
>>>>>> Random meaningless (to me) message that is apparently a display 
>>>>>> thing:
>>>>>> drm_dbg_kms(&dev_priv->drm, "disabling %s\n", pll->info->name);
>>>>>> i915 0000:00:02.0: [drm:intel_disable_shared_dpll [i915]] disabling
>>>>>> PORT PLL B
>>>>>
>>>>> Plan is to not touch outside gt/.
>> For some unexplicable reason that means it is almost impossible to 
>> see the actual problems in most CI dmesg logs because they are 
>> swamped with irrelevant display messages that cannot be filtered out. 
>> For example, I recently manually grep'd out all the display spam from 
>> a bug report log. The dmesg file went from 12MB to 700KB. That is a 
>> significant problem that makes bug triage way harder than it needs to 
>> be.
>
> I didn't get this part, how it would reduce the amount of spam by 
> adding new macros? Anyway, that's something to split out and discuss 
> with display folks.
It will allow someone to trivially filter out everything with that tag. 
Which then makes it orders of magnitude easy to scan through the log to 
see what happened.

>
>>> Maybe as a way forward work could be split? If John wants to deal 
>>> with gt_xxx macros, avoid touching GuC (putting his original 
>>> motivation aside) and you want to convert the gt/uc folder? Assuming 
>>> John you are okay with "GuC:" and "CT:" prefixes.
>> Meaning just repost patch #1 only and expand to more intel_gt_* 
>> files? Sure, if someone will actually reply to that patch with some 
>> kind of r-b first so I know I'm not still wasting my time on a huge 
>> re-write that will to be redone multiple times when someone objects 
>> to the use of a colon or the lack of spaces, braces or whatever.
>
> First patch looks good to me (ack in principle) apart that Michal 
> found one potential null pointer dereference if I understood it right. 
> That other comment about the ratelimited call is maybe okay to leave 
> for later, *if* it will be a single instance, otherwise needs a gt 
> logger as well. I can r-b once you re-send with the first issue fixed.
I've already fixed those two issues locally. I'm not going to touch the 
TRACE macros.

Okay. I'll extend it further and repost.

John.


>
> Regards,
>
> Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 2b22065e87bf9..9d404fb377637 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -18,31 +18,49 @@  static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct)
 	return container_of(ct, struct intel_guc, ct);
 }
 
-static inline struct intel_gt *ct_to_gt(struct intel_guc_ct *ct)
-{
-	return guc_to_gt(ct_to_guc(ct));
-}
-
 static inline struct drm_i915_private *ct_to_i915(struct intel_guc_ct *ct)
 {
-	return ct_to_gt(ct)->i915;
-}
+	struct intel_guc *guc = ct_to_guc(ct);
+	struct intel_gt *gt = guc_to_gt(guc);
 
-static inline struct drm_device *ct_to_drm(struct intel_guc_ct *ct)
-{
-	return &ct_to_i915(ct)->drm;
+	return gt->i915;
 }
 
-#define CT_ERROR(_ct, _fmt, ...) \
-	drm_err(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
+#define ct_err(_ct, _fmt, ...) \
+	guc_err(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
+
+#define ct_warn(_ct, _fmt, ...) \
+	guc_warn(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
+
+#define ct_notice(_ct, _fmt, ...) \
+	guc_notice(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
+
+#define ct_info(_ct, _fmt, ...) \
+	guc_info(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
+
 #ifdef CONFIG_DRM_I915_DEBUG_GUC
-#define CT_DEBUG(_ct, _fmt, ...) \
-	drm_dbg(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
+#define ct_dbg(_ct, _fmt, ...) \
+	guc_dbg(ct_to_guc(_ct), "CT " _fmt, ##__VA_ARGS__)
 #else
-#define CT_DEBUG(...)	do { } while (0)
+#define ct_dbg(...)	do { } while (0)
 #endif
-#define CT_PROBE_ERROR(_ct, _fmt, ...) \
-	i915_probe_error(ct_to_i915(ct), "CT: " _fmt, ##__VA_ARGS__)
+
+#define ct_probe_error(_ct, _fmt, ...) \
+	do { \
+		if (i915_error_injected()) \
+			ct_dbg(_ct, _fmt, ##__VA_ARGS__); \
+		else \
+			ct_err(_ct, _fmt, ##__VA_ARGS__); \
+	} while (0)
+
+#define ct_WARN_ON(_ct, _condition) \
+	ct_WARN(_ct, _condition, "%s", "ct_WARN_ON(" __stringify(_condition) ")")
+
+#define ct_WARN(_ct, _condition, _fmt, ...) \
+	guc_WARN(ct_to_guc(_ct), _condition, "CT " _fmt, ##__VA_ARGS__)
+
+#define ct_WARN_ONCE(_ct, _condition, _fmt, ...) \
+	guc_WARN_ONCE(ct_to_guc(_ct), _condition, "CT " _fmt, ##__VA_ARGS__)
 
 /**
  * DOC: CTB Blob
@@ -170,7 +188,7 @@  static int ct_control_enable(struct intel_guc_ct *ct, bool enable)
 	err = guc_action_control_ctb(ct_to_guc(ct), enable ?
 				     GUC_CTB_CONTROL_ENABLE : GUC_CTB_CONTROL_DISABLE);
 	if (unlikely(err))
-		CT_PROBE_ERROR(ct, "Failed to control/%s CTB (%pe)\n",
+		ct_probe_error(ct, "Failed to control/%s CTB (%pe)\n",
 			       str_enable_disable(enable), ERR_PTR(err));
 
 	return err;
@@ -201,7 +219,7 @@  static int ct_register_buffer(struct intel_guc_ct *ct, bool send,
 				   size);
 	if (unlikely(err))
 failed:
-		CT_PROBE_ERROR(ct, "Failed to register %s buffer (%pe)\n",
+		ct_probe_error(ct, "Failed to register %s buffer (%pe)\n",
 			       send ? "SEND" : "RECV", ERR_PTR(err));
 
 	return err;
@@ -235,21 +253,21 @@  int intel_guc_ct_init(struct intel_guc_ct *ct)
 	blob_size = 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE + CTB_G2H_BUFFER_SIZE;
 	err = intel_guc_allocate_and_map_vma(guc, blob_size, &ct->vma, &blob);
 	if (unlikely(err)) {
-		CT_PROBE_ERROR(ct, "Failed to allocate %u for CTB data (%pe)\n",
+		ct_probe_error(ct, "Failed to allocate %u for CTB data (%pe)\n",
 			       blob_size, ERR_PTR(err));
 		return err;
 	}
 
-	CT_DEBUG(ct, "base=%#x size=%u\n", intel_guc_ggtt_offset(guc, ct->vma), blob_size);
+	ct_dbg(ct, "base=%#x size=%u\n", intel_guc_ggtt_offset(guc, ct->vma), blob_size);
 
 	/* store pointers to desc and cmds for send ctb */
 	desc = blob;
 	cmds = blob + 2 * CTB_DESC_SIZE;
 	cmds_size = CTB_H2G_BUFFER_SIZE;
 	resv_space = 0;
-	CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send",
-		 ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
-		 resv_space);
+	ct_dbg(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send",
+	       ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
+	       resv_space);
 
 	guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size, resv_space);
 
@@ -258,9 +276,9 @@  int intel_guc_ct_init(struct intel_guc_ct *ct)
 	cmds = blob + 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE;
 	cmds_size = CTB_G2H_BUFFER_SIZE;
 	resv_space = G2H_ROOM_BUFFER_SIZE;
-	CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv",
-		 ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
-		 resv_space);
+	ct_dbg(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv",
+	       ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
+	       resv_space);
 
 	guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size, resv_space);
 
@@ -338,7 +356,7 @@  int intel_guc_ct_enable(struct intel_guc_ct *ct)
 	return 0;
 
 err_out:
-	CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
+	ct_probe_error(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
 	return err;
 }
 
@@ -387,14 +405,12 @@  static int ct_write(struct intel_guc_ct *ct,
 
 #ifdef CONFIG_DRM_I915_DEBUG_GUC
 	if (unlikely(tail != READ_ONCE(desc->tail))) {
-		CT_ERROR(ct, "Tail was modified %u != %u\n",
-			 desc->tail, tail);
+		ct_err(ct, "Tail was modified %u != %u\n", desc->tail, tail);
 		desc->status |= GUC_CTB_STATUS_MISMATCH;
 		goto corrupted;
 	}
 	if (unlikely(READ_ONCE(desc->head) >= size)) {
-		CT_ERROR(ct, "Invalid head offset %u >= %u)\n",
-			 desc->head, size);
+		ct_err(ct, "Invalid head offset %u >= %u)\n", desc->head, size);
 		desc->status |= GUC_CTB_STATUS_OVERFLOW;
 		goto corrupted;
 	}
@@ -415,8 +431,8 @@  static int ct_write(struct intel_guc_ct *ct,
 		FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
 			   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
 
-	CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n",
-		 tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
+	ct_dbg(ct, "writing (tail %u) %*ph %*ph %*ph\n",
+	       tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
 
 	cmds[tail] = header;
 	tail = (tail + 1) % size;
@@ -447,8 +463,8 @@  static int ct_write(struct intel_guc_ct *ct,
 	return 0;
 
 corrupted:
-	CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
-		 desc->head, desc->tail, desc->status);
+	ct_err(ct, "Corrupted descriptor on write head=%u tail=%u status=%#x\n",
+	       desc->head, desc->tail, desc->status);
 	ctb->broken = true;
 	return -EPIPE;
 }
@@ -507,17 +523,14 @@  static inline bool ct_deadlocked(struct intel_guc_ct *ct)
 		struct guc_ct_buffer_desc *send = ct->ctbs.send.desc;
 		struct guc_ct_buffer_desc *recv = ct->ctbs.send.desc;
 
-		CT_ERROR(ct, "Communication stalled for %lld ms, desc status=%#x,%#x\n",
-			 ktime_ms_delta(ktime_get(), ct->stall_time),
-			 send->status, recv->status);
-		CT_ERROR(ct, "H2G Space: %u (Bytes)\n",
-			 atomic_read(&ct->ctbs.send.space) * 4);
-		CT_ERROR(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head);
-		CT_ERROR(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail);
-		CT_ERROR(ct, "G2H Space: %u (Bytes)\n",
-			 atomic_read(&ct->ctbs.recv.space) * 4);
-		CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
-		CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
+		ct_err(ct, "Communication stalled for %lld ms, desc status=%#x,%#x\n",
+		       ktime_ms_delta(ktime_get(), ct->stall_time), send->status, recv->status);
+		ct_err(ct, "H2G Space: %u (Bytes)\n", atomic_read(&ct->ctbs.send.space) * 4);
+		ct_err(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head);
+		ct_err(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail);
+		ct_err(ct, "G2H Space: %u (Bytes)\n", atomic_read(&ct->ctbs.recv.space) * 4);
+		ct_err(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
+		ct_err(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
 
 		ct->ctbs.send.broken = true;
 	}
@@ -563,8 +576,7 @@  static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
 
 	head = READ_ONCE(desc->head);
 	if (unlikely(head > ctb->size)) {
-		CT_ERROR(ct, "Invalid head offset %u >= %u)\n",
-			 head, ctb->size);
+		ct_err(ct, "Invalid head offset %u >= %u)\n", head, ctb->size);
 		desc->status |= GUC_CTB_STATUS_OVERFLOW;
 		ctb->broken = true;
 		return false;
@@ -715,17 +727,17 @@  static int ct_send(struct intel_guc_ct *ct,
 			/* wait_for_ct_request_update returns -ENODEV on reset/suspend in progress.
 			 * In this case, output is debug rather than error info
 			 */
-			CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n",
-				 action[0], request.fence);
+			ct_dbg(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n",
+			       action[0], request.fence);
 		else
-			CT_ERROR(ct, "No response for request %#x (fence %u)\n",
-				 action[0], request.fence);
+			ct_err(ct, "No response for request %#x (fence %u)\n",
+			       action[0], request.fence);
 		goto unlink;
 	}
 
 	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
-		CT_DEBUG(ct, "retrying request %#x (%u)\n", *action,
-			 FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status));
+		ct_dbg(ct, "retrying request %#x (%u)\n", *action,
+		       FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status));
 		send_again = true;
 		goto unlink;
 	}
@@ -737,12 +749,12 @@  static int ct_send(struct intel_guc_ct *ct,
 
 	if (response_buf) {
 		/* There shall be no data in the status */
-		WARN_ON(FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, request.status));
+		ct_WARN_ON(ct, FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, request.status));
 		/* Return actual response len */
 		err = request.response_len;
 	} else {
 		/* There shall be no response payload */
-		WARN_ON(request.response_len);
+		ct_WARN_ON(ct, request.response_len);
 		/* Return data decoded from the status dword */
 		err = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, *status);
 	}
@@ -771,7 +783,7 @@  int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
 		struct intel_guc *guc = ct_to_guc(ct);
 		struct intel_uc *uc = container_of(guc, struct intel_uc, guc);
 
-		WARN(!uc->reset_in_progress, "Unexpected send: action=%#x\n", *action);
+		ct_WARN(ct, !uc->reset_in_progress, "Unexpected send: action=%#x\n", *action);
 		return -ENODEV;
 	}
 
@@ -784,11 +796,11 @@  int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
 	ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
 	if (unlikely(ret < 0)) {
 		if (ret != -ENODEV)
-			CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
-				 action[0], ERR_PTR(ret), status);
+			ct_err(ct, "sending action %#x failed (%pe) status=%#X\n",
+			       action[0], ERR_PTR(ret), status);
 	} else if (unlikely(ret)) {
-		CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
-			 action[0], ret, ret);
+		ct_dbg(ct, "send action %#x returned %d (%#x)\n",
+		       action[0], ret, ret);
 	}
 
 	return ret;
@@ -838,7 +850,7 @@  static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
 			 * contexts/engines being reset. But should never happen as
 			 * no contexts should be active when CLIENT_RESET is sent.
 			 */
-			CT_ERROR(ct, "Unexpected G2H after GuC has stopped!\n");
+			ct_err(ct, "Unexpected G2H after GuC has stopped!\n");
 			status &= ~GUC_CTB_STATUS_UNUSED;
 		}
 
@@ -850,15 +862,13 @@  static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
 
 #ifdef CONFIG_DRM_I915_DEBUG_GUC
 	if (unlikely(head != READ_ONCE(desc->head))) {
-		CT_ERROR(ct, "Head was modified %u != %u\n",
-			 desc->head, head);
+		ct_err(ct, "Head was modified %u != %u\n", desc->head, head);
 		desc->status |= GUC_CTB_STATUS_MISMATCH;
 		goto corrupted;
 	}
 #endif
 	if (unlikely(tail >= size)) {
-		CT_ERROR(ct, "Invalid tail offset %u >= %u)\n",
-			 tail, size);
+		ct_err(ct, "Invalid tail offset %u >= %u)\n", tail, size);
 		desc->status |= GUC_CTB_STATUS_OVERFLOW;
 		goto corrupted;
 	}
@@ -873,7 +883,7 @@  static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
 	/* beware of buffer wrap case */
 	if (unlikely(available < 0))
 		available += size;
-	CT_DEBUG(ct, "available %d (%u:%u:%u)\n", available, head, tail, size);
+	ct_dbg(ct, "read available %d (%u:%u:%u)\n", available, head, tail, size);
 	GEM_BUG_ON(available < 0);
 
 	header = cmds[head];
@@ -882,24 +892,24 @@  static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
 	/* message len with header */
 	len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, header) + GUC_CTB_MSG_MIN_LEN;
 	if (unlikely(len > (u32)available)) {
-		CT_ERROR(ct, "Incomplete message %*ph %*ph %*ph\n",
-			 4, &header,
-			 4 * (head + available - 1 > size ?
-			      size - head : available - 1), &cmds[head],
-			 4 * (head + available - 1 > size ?
-			      available - 1 - size + head : 0), &cmds[0]);
+		ct_err(ct, "Incomplete message %*ph %*ph %*ph\n",
+		       4, &header,
+		       4 * (head + available - 1 > size ?
+			    size - head : available - 1), &cmds[head],
+		       4 * (head + available - 1 > size ?
+			    available - 1 - size + head : 0), &cmds[0]);
 		desc->status |= GUC_CTB_STATUS_UNDERFLOW;
 		goto corrupted;
 	}
 
 	*msg = ct_alloc_msg(len);
 	if (!*msg) {
-		CT_ERROR(ct, "No memory for message %*ph %*ph %*ph\n",
-			 4, &header,
-			 4 * (head + available - 1 > size ?
-			      size - head : available - 1), &cmds[head],
-			 4 * (head + available - 1 > size ?
-			      available - 1 - size + head : 0), &cmds[0]);
+		ct_err(ct, "No memory for message %*ph %*ph %*ph\n",
+		       4, &header,
+		       4 * (head + available - 1 > size ?
+			    size - head : available - 1), &cmds[head],
+		       4 * (head + available - 1 > size ?
+			    available - 1 - size + head : 0), &cmds[0]);
 		return available;
 	}
 
@@ -909,7 +919,7 @@  static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
 		(*msg)->msg[i] = cmds[head];
 		head = (head + 1) % size;
 	}
-	CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg);
+	ct_dbg(ct, "received %*ph\n", 4 * len, (*msg)->msg);
 
 	/* update local copies */
 	ctb->head = head;
@@ -920,8 +930,8 @@  static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
 	return available - len;
 
 corrupted:
-	CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
-		 desc->head, desc->tail, desc->status);
+	ct_err(ct, "Corrupted descriptor on read head=%u tail=%u status=%#x\n",
+	       desc->head, desc->tail, desc->status);
 	ctb->broken = true;
 	return -EPIPE;
 }
@@ -944,18 +954,17 @@  static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 		   FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_NO_RESPONSE_RETRY &&
 		   FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_FAILURE);
 
-	CT_DEBUG(ct, "response fence %u status %#x\n", fence, hxg[0]);
+	ct_dbg(ct, "response fence %u status %#x\n", fence, hxg[0]);
 
 	spin_lock_irqsave(&ct->requests.lock, flags);
 	list_for_each_entry(req, &ct->requests.pending, link) {
 		if (unlikely(fence != req->fence)) {
-			CT_DEBUG(ct, "request %u awaits response\n",
-				 req->fence);
+			ct_dbg(ct, "request %u awaits response\n", req->fence);
 			continue;
 		}
 		if (unlikely(datalen > req->response_len)) {
-			CT_ERROR(ct, "Response %u too long (datalen %u > %u)\n",
-				 req->fence, datalen, req->response_len);
+			ct_err(ct, "response %u too long (datalen %u > %u)\n",
+			       req->fence, datalen, req->response_len);
 			datalen = min(datalen, req->response_len);
 			err = -EMSGSIZE;
 		}
@@ -967,12 +976,11 @@  static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 		break;
 	}
 	if (!found) {
-		CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence);
-		CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n", fence,
-			 ct->requests.last_fence);
+		ct_err(ct, "Unsolicited response (fence %u)\n", fence);
+		ct_err(ct, "Could not find fence=%u, last_fence=%u\n", fence,
+		       ct->requests.last_fence);
 		list_for_each_entry(req, &ct->requests.pending, link)
-			CT_ERROR(ct, "request %u awaits response\n",
-				 req->fence);
+			ct_err(ct, "request %u awaits response\n", req->fence);
 		err = -ENOKEY;
 	}
 	spin_unlock_irqrestore(&ct->requests.lock, flags);
@@ -998,7 +1006,7 @@  static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 	action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
 	len = hxg_len - GUC_HXG_MSG_MIN_LEN;
 
-	CT_DEBUG(ct, "request %x %*ph\n", action, 4 * len, payload);
+	ct_dbg(ct, "request %x %*ph\n", action, 4 * len, payload);
 
 	switch (action) {
 	case INTEL_GUC_ACTION_DEFAULT:
@@ -1016,9 +1024,6 @@  static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 		break;
 	case INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
 		ret = intel_guc_error_capture_process_msg(guc, payload, len);
-		if (unlikely(ret))
-			CT_ERROR(ct, "error capture notification failed %x %*ph\n",
-				 action, 4 * len, payload);
 		break;
 	case INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
 		ret = intel_guc_engine_failure_process_msg(guc, payload, len);
@@ -1028,11 +1033,11 @@  static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 		ret = 0;
 		break;
 	case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED:
-		CT_ERROR(ct, "Received GuC crash dump notification!\n");
+		guc_err(guc, "notification: Crash dump!\n");
 		ret = 0;
 		break;
 	case INTEL_GUC_ACTION_NOTIFY_EXCEPTION:
-		CT_ERROR(ct, "Received GuC exception notification!\n");
+		guc_err(guc, "notification: Exception!\n");
 		ret = 0;
 		break;
 	default:
@@ -1041,8 +1046,7 @@  static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 	}
 
 	if (unlikely(ret)) {
-		CT_ERROR(ct, "Failed to process request %04x (%pe)\n",
-			 action, ERR_PTR(ret));
+		ct_err(ct, "Failed to process request %04x (%pe)\n", action, ERR_PTR(ret));
 		return ret;
 	}
 
@@ -1070,8 +1074,8 @@  static bool ct_process_incoming_requests(struct intel_guc_ct *ct)
 
 	err = ct_process_request(ct, request);
 	if (unlikely(err)) {
-		CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n",
-			 ERR_PTR(err), 4 * request->size, request->msg);
+		ct_err(ct, "Failed to process message (%pe) %*ph\n",
+		       ERR_PTR(err), 4 * request->size, request->msg);
 		ct_free_msg(request);
 	}
 
@@ -1149,8 +1153,8 @@  static int ct_handle_hxg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg)
 
 	if (unlikely(err)) {
 failed:
-		CT_ERROR(ct, "Failed to handle HXG message (%pe) %*ph\n",
-			 ERR_PTR(err), 4 * GUC_HXG_MSG_MIN_LEN, hxg);
+		ct_err(ct, "Failed to handle HXG message (%pe) %*ph\n",
+		       ERR_PTR(err), 4 * GUC_HXG_MSG_MIN_LEN, hxg);
 	}
 	return err;
 }
@@ -1166,8 +1170,8 @@  static void ct_handle_msg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg)
 		err = -EOPNOTSUPP;
 
 	if (unlikely(err)) {
-		CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n",
-			 ERR_PTR(err), 4 * msg->size, msg->msg);
+		ct_err(ct, "Failed to handle message (%pe) %*ph\n",
+		       ERR_PTR(err), 4 * msg->size, msg->msg);
 		ct_free_msg(msg);
 	}
 }
@@ -1198,7 +1202,7 @@  static void ct_try_receive_message(struct intel_guc_ct *ct)
 {
 	int ret;
 
-	if (GEM_WARN_ON(!ct->enabled))
+	if (ct_WARN_ON(ct, !ct->enabled))
 		return;
 
 	ret = ct_receive(ct);
@@ -1220,7 +1224,7 @@  static void ct_receive_tasklet_func(struct tasklet_struct *t)
 void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
 {
 	if (unlikely(!ct->enabled)) {
-		WARN(1, "Unexpected GuC event received while CT disabled!\n");
+		ct_warn(ct, "Unexpected event received while disabled!\n");
 		return;
 	}