@@ -1587,6 +1587,32 @@ guc_capture_get_manual_snapshot(struct xe_guc *guc, struct xe_hw_engine *hwe)
return new;
}
+/**
+ * xe_guc_capture_snapshot_manual_hwe - Generate and get manual engine register dump
+ * @guc: Target GuC for manual capture
+ * @hwe: The engine instance to capture from
+ *
+ * Generate a manual GuC-Error-Capture snapshot of engine instance + engine class registers
+ * without any queue association. This capture node is not stored in outlist or cachelist,
+ * Returns: New capture node and caller must "put"
+ */
+struct xe_guc_capture_snapshot *
+xe_guc_capture_snapshot_manual_hwe(struct xe_guc *guc, struct xe_hw_engine *hwe)
+{
+ struct xe_guc_capture_snapshot *new;
+
+ new = guc_capture_get_manual_snapshot(guc, hwe);
+ if (!new)
+ return NULL;
+
+ new->guc_id = 0;
+ new->lrca = 0;
+ new->is_partial = 0;
+ new->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL_RAW;
+
+ return new;
+}
+
/**
* xe_guc_capture_snapshot_store_manual_job - Generate and store a manual engine register dump
* @guc: Target GuC for manual capture
@@ -1634,7 +1660,7 @@ xe_guc_capture_snapshot_store_manual_job(struct xe_guc *guc, struct xe_exec_queu
new->lrca = xe_lrc_ggtt_addr(q->lrc[0]);
new->is_partial = 0;
new->locked = 1;
- new->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
+ new->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL_JOB;
guc_capture_add_node_to_outlist(guc->capture, new);
@@ -1775,6 +1801,11 @@ void xe_guc_capture_snapshot_print(struct xe_guc *guc, struct xe_guc_capture_sna
"full-capture",
"partial-capture"
};
+ const char *srctype[XE_ENGINE_CAPTURE_SOURCE_GUC + 1] = {
+ "Manual-Job",
+ "Manual-Raw",
+ "GuC"
+ };
int type;
const struct __guc_mmio_reg_descr_group *list;
struct xe_gt *gt;
@@ -1791,9 +1822,7 @@ void xe_guc_capture_snapshot_print(struct xe_guc *guc, struct xe_guc_capture_sna
return;
}
- drm_printf(p, "\tCapture_source: %s\n",
- node->source == XE_ENGINE_CAPTURE_SOURCE_GUC ?
- "GuC" : "Manual");
+ drm_printf(p, "\tCapture_source: %s\n", srctype[node->source]);
drm_printf(p, "\tCoverage: %s\n", grptype[node->is_partial]);
for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
@@ -1825,7 +1854,7 @@ void xe_guc_capture_snapshot_print(struct xe_guc *guc, struct xe_guc_capture_sna
*/
struct xe_guc_capture_snapshot *
xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q,
- enum xe_guc_capture_snapshot_source srctype)
+ enum xe_engine_capture_source srctype)
{
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
@@ -52,8 +52,10 @@ xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type,
enum guc_capture_list_class_type capture_class, bool is_ext);
struct xe_guc_capture_snapshot *
xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q,
- enum xe_guc_capture_snapshot_source srctype);
+ enum xe_engine_capture_source srctype);
void xe_guc_capture_snapshot_store_manual_job(struct xe_guc *guc, struct xe_exec_queue *q);
+struct xe_guc_capture_snapshot *
+xe_guc_capture_snapshot_manual_hwe(struct xe_guc *guc, struct xe_hw_engine *hwe);
void xe_guc_capture_snapshot_print(struct xe_guc *guc, struct xe_guc_capture_snapshot *node,
struct drm_printer *p);
void xe_guc_capture_steered_list_init(struct xe_guc *guc);
@@ -11,8 +11,12 @@
struct guc_mmio_reg;
-enum xe_guc_capture_snapshot_source {
- XE_ENGINE_CAPTURE_SOURCE_MANUAL,
+enum xe_engine_capture_source {
+ /* KMD captured engine registers when job timeout is detected */
+ XE_ENGINE_CAPTURE_SOURCE_MANUAL_JOB,
+ /* KMD captured raw engine registers without any job association */
+ XE_ENGINE_CAPTURE_SOURCE_MANUAL_RAW,
+ /* GUC-FW captured engine registers before workload was killed */
XE_ENGINE_CAPTURE_SOURCE_GUC
};
@@ -40,7 +44,7 @@ struct xe_guc_capture_snapshot {
u32 lrca;
u32 type;
bool locked;
- enum xe_guc_capture_snapshot_source source;
+ enum xe_engine_capture_source source;
struct gcap_reg_list_info {
u32 vfid;
u32 num_regs;
@@ -1079,7 +1079,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
/*
* Generate a manual capture. Below function will store it
* in GuC Error Capture's internal link-list as if it came from GuC
- * but with a source-type == XE_ENGINE_CAPTURE_SOURCE_MANUAL
+ * but with a source-type == XE_ENGINE_CAPTURE_SOURCE_MANUAL_JOB
*/
xe_guc_capture_snapshot_store_manual_job(guc, q);
xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
@@ -832,7 +832,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
/**
* hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
* @hwe: Xe HW Engine.
- * @q: The exec queue object.
+ * @q: The exec queue object. (can be NULL for debugfs engine-register dump)
*
* This can be printed out in a later stage like during dev_coredump
* analysis.
@@ -845,9 +845,11 @@ hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q)
{
struct xe_hw_engine_snapshot *snapshot;
struct xe_guc_capture_snapshot *node;
+ struct xe_guc *guc;
if (!xe_hw_engine_is_valid(hwe))
return NULL;
+ guc = &hwe->gt->uc.guc;
snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
@@ -869,7 +871,7 @@ hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q)
if (q) {
/* First, retrieve the manual GuC-Error-Capture node if it exists */
- node = xe_guc_capture_get_matching_and_lock(q, XE_ENGINE_CAPTURE_SOURCE_MANUAL);
+ node = xe_guc_capture_get_matching_and_lock(q, XE_ENGINE_CAPTURE_SOURCE_MANUAL_JOB);
/* Find preferred node type sourced from firmware if available */
snapshot->matched_node = xe_guc_capture_get_matching_and_lock(q, XE_ENGINE_CAPTURE_SOURCE_GUC);
if (!snapshot->matched_node) {
@@ -877,13 +879,22 @@ hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q)
snapshot->matched_node = node;
} else if (node) {
xe_gt_dbg(hwe->gt, "Found manual GuC-Err-Capture for queue %s", q->name);
- xe_guc_capture_put_matched_nodes(&hwe->gt->uc.guc, node);
+ xe_guc_capture_put_matched_nodes(guc, node);
}
if (!snapshot->matched_node)
xe_gt_dbg(hwe->gt, "Can't retrieve any GuC-Err-Capture node for queue %s",
q->name);
}
+ if (!snapshot->matched_node) {
+ /*
+ * Fallback path - do an immediate jobless manual engine capture.
+ * This will happen when debugfs is triggered to force an engine dump.
+ */
+ snapshot->matched_node = xe_guc_capture_snapshot_manual_hwe(guc, hwe);
+ xe_gt_dbg(hwe->gt, "Fallback to jobless-manual-err-capture node");
+ }
+
return snapshot;
}