From patchwork Mon Nov 22 23:03:56 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alan Previn X-Patchwork-Id: 12633087 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id 3F3DFC433EF for ; Mon, 22 Nov 2021 23:02:54 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 958B389EFF; Mon, 22 Nov 2021 23:02:53 +0000 (UTC) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by gabe.freedesktop.org (Postfix) with ESMTPS id 746F889ECD for ; Mon, 22 Nov 2021 23:02:52 +0000 (UTC) X-IronPort-AV: E=McAfee;i="6200,9189,10176"; a="215612617" X-IronPort-AV: E=Sophos;i="5.87,255,1631602800"; d="scan'208";a="215612617" Received: from orsmga005.jf.intel.com ([10.7.209.41]) by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 22 Nov 2021 15:02:51 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.87,255,1631602800"; d="scan'208";a="674237059" Received: from aalteres-desk.fm.intel.com ([10.80.57.53]) by orsmga005.jf.intel.com with ESMTP; 22 Nov 2021 15:02:51 -0800 From: Alan Previn To: intel-gfx@lists.freedesktop.org Date: Mon, 22 Nov 2021 15:03:56 -0800 Message-Id: <20211122230402.2023576-2-alan.previn.teres.alexis@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20211122230402.2023576-1-alan.previn.teres.alexis@intel.com> References: <20211122230402.2023576-1-alan.previn.teres.alexis@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [RFC 1/7] drm/i915/guc: Add basic support for error capture lists X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Alan Previn Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" From: John Harrison Add not-quite-support for GuC based error capture. GuC will add error capture capability amongst other things. In order to load the firmware, a minimum amount of support is required on the driver side. This adds that bare minimum. Signed-off-by: John Harrison Reviewed-by: Alan Previn --- .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc.c | 42 +++++++++++------ drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 45 ++++++++++++++++++- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 3 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 21 ++++++++- drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 9 +++- drivers/gpu/drm/i915/gt/uc/intel_guc_log.h | 2 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 18 ++++++++ 9 files changed, 126 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index fe5d7d261797..5af03a486a13 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -145,6 +145,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, INTEL_GUC_ACTION_RESET_CLIENT = 0x5507, INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, + INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 6e228343e8cb..5cf9ebd2ee55 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -222,32 +222,48 @@ static u32 guc_ctl_log_params_flags(struct intel_guc *guc) u32 flags; #if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0) - #define UNIT SZ_1M - #define FLAG GUC_LOG_ALLOC_IN_MEGABYTE + #define LOG_UNIT SZ_1M + #define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS #else - #define UNIT SZ_4K - #define FLAG 0 + #define LOG_UNIT SZ_4K + #define LOG_FLAG 0 + #endif + + #if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0) + #define CAPTURE_UNIT SZ_1M + #define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS + #else + #define CAPTURE_UNIT SZ_4K + #define CAPTURE_FLAG 0 #endif BUILD_BUG_ON(!CRASH_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, UNIT)); + BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT)); BUILD_BUG_ON(!DEBUG_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, UNIT)); + BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT)); + BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT)); - BUILD_BUG_ON((CRASH_BUFFER_SIZE / UNIT - 1) > + BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) > (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); - BUILD_BUG_ON((DEBUG_BUFFER_SIZE / UNIT - 1) > + BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) > (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT)); + BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) > + (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT)); flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | - FLAG | - ((CRASH_BUFFER_SIZE / UNIT - 1) << GUC_LOG_CRASH_SHIFT) | - ((DEBUG_BUFFER_SIZE / UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | + CAPTURE_FLAG | + LOG_FLAG | + ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) | + ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | + ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) << GUC_LOG_CAPTURE_SHIFT) | (offset << GUC_LOG_BUF_ADDR_SHIFT); - #undef UNIT - #undef FLAG + #undef LOG_UNIT + #undef LOG_FLAG + #undef CAPTURE_UNIT + #undef CAPTURE_FLAG return flags; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 1cb46098030d..9de99772f916 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -392,6 +392,8 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); int intel_guc_engine_failure_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); +int intel_guc_error_capture_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len); void intel_guc_find_hung_context(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 1a1edae67e4e..6c81ddd303d3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -40,6 +40,10 @@ * +---------------------------------------+ * | padding | * +---------------------------------------+ <== 4K aligned + * | capture lists | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned * | private data | * +---------------------------------------+ * | padding | @@ -65,6 +69,12 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc) return PAGE_ALIGN(guc->ads_golden_ctxt_size); } +static u32 guc_ads_capture_size(struct intel_guc *guc) +{ + /* Basic support to init ADS without a proper GuC error capture list */ + return PAGE_ALIGN(PAGE_SIZE); +} + static u32 guc_ads_private_data_size(struct intel_guc *guc) { return PAGE_ALIGN(guc->fw.private_data_size); @@ -85,7 +95,7 @@ static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc) return PAGE_ALIGN(offset); } -static u32 guc_ads_private_data_offset(struct intel_guc *guc) +static u32 guc_ads_capture_offset(struct intel_guc *guc) { u32 offset; @@ -95,6 +105,16 @@ static u32 guc_ads_private_data_offset(struct intel_guc *guc) return PAGE_ALIGN(offset); } +static u32 guc_ads_private_data_offset(struct intel_guc *guc) +{ + u32 offset; + + offset = guc_ads_capture_offset(guc) + + guc_ads_capture_size(guc); + + return PAGE_ALIGN(offset); +} + static u32 guc_ads_blob_size(struct intel_guc *guc) { return guc_ads_private_data_offset(guc) + @@ -499,6 +519,26 @@ static void guc_init_golden_context(struct intel_guc *guc) GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); } +static void guc_capture_prep_lists(struct intel_guc *guc, struct __guc_ads_blob *blob) +{ + int i, j; + u32 addr_ggtt, offset; + + offset = guc_ads_capture_offset(guc); + addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + + /* FIXME: Populate a proper capture list */ + + for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) { + for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) { + blob->ads.capture_instance[i][j] = addr_ggtt; + blob->ads.capture_class[i][j] = addr_ggtt; + } + + blob->ads.capture_global[i] = addr_ggtt; + } +} + static void __guc_ads_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -532,6 +572,9 @@ static void __guc_ads_init(struct intel_guc *guc) base = intel_guc_ggtt_offset(guc, guc->ads_vma); + /* Lists for error capture debug */ + guc_capture_prep_lists(guc, blob); + /* ADS */ blob->ads.scheduler_policies = base + ptr_offset(blob, policies); blob->ads.gt_system_info = base + ptr_offset(blob, system_info); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index a0cc34be7b56..c20c0bcb83f9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -974,6 +974,9 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r case INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION: ret = intel_guc_context_reset_process_msg(guc, payload, len); break; + case INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION: + ret = intel_guc_error_capture_process_msg(guc, payload, len); + break; case INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION: ret = intel_guc_engine_failure_process_msg(guc, payload, len); break; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 7072e30e99f4..767684b6af67 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -86,11 +86,14 @@ #define GUC_CTL_LOG_PARAMS 0 #define GUC_LOG_VALID (1 << 0) #define GUC_LOG_NOTIFY_ON_HALF_FULL (1 << 1) -#define GUC_LOG_ALLOC_IN_MEGABYTE (1 << 3) +#define GUC_LOG_CAPTURE_ALLOC_UNITS (1 << 2) +#define GUC_LOG_LOG_ALLOC_UNITS (1 << 3) #define GUC_LOG_CRASH_SHIFT 4 #define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT) #define GUC_LOG_DEBUG_SHIFT 6 #define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT) +#define GUC_LOG_CAPTURE_SHIFT 10 +#define GUC_LOG_CAPTURE_MASK (0x3 << GUC_LOG_CAPTURE_SHIFT) #define GUC_LOG_BUF_ADDR_SHIFT 12 #define GUC_CTL_WA 1 @@ -264,6 +267,7 @@ struct guc_mmio_reg { u32 value; u32 flags; #define GUC_REGSET_MASKED (1 << 0) + u32 mask; } __packed; /* GuC register sets */ @@ -280,6 +284,14 @@ struct guc_gt_system_info { u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX]; } __packed; +/* Capture-types of GuC capture register lists */ +enum +{ + GUC_CAPTURE_LIST_INDEX_PF = 0, + GUC_CAPTURE_LIST_INDEX_VF = 1, + GUC_CAPTURE_LIST_INDEX_MAX = 2, +}; + /* GuC Additional Data Struct */ struct guc_ads { struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; @@ -291,7 +303,11 @@ struct guc_ads { u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES]; u32 eng_state_size[GUC_MAX_ENGINE_CLASSES]; u32 private_data; - u32 reserved[15]; + u32 reserved2; + u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; + u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; + u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX]; + u32 reserved[4]; } __packed; /* Engine usage stats */ @@ -312,6 +328,7 @@ struct guc_engine_usage { enum guc_log_buffer_type { GUC_DEBUG_LOG_BUFFER, GUC_CRASH_DUMP_LOG_BUFFER, + GUC_CAPTURE_LOG_BUFFER, GUC_MAX_LOG_BUFFER }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index ac0931f0374b..1962a43302a8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -201,6 +201,8 @@ static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) return DEBUG_BUFFER_SIZE; case GUC_CRASH_DUMP_LOG_BUFFER: return CRASH_BUFFER_SIZE; + case GUC_CAPTURE_LOG_BUFFER: + return CAPTURE_BUFFER_SIZE; default: MISSING_CASE(type); } @@ -463,6 +465,8 @@ int intel_guc_log_create(struct intel_guc_log *log) * +-------------------------------+ 32B * | Debug state header | * +-------------------------------+ 64B + * | Capture state header | + * +-------------------------------+ 96B * | | * +===============================+ PAGE_SIZE (4KB) * | Crash Dump logs | @@ -470,7 +474,8 @@ int intel_guc_log_create(struct intel_guc_log *log) * | Debug logs | * +===============================+ + DEBUG_SIZE */ - guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE; + guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + + CAPTURE_BUFFER_SIZE; vma = intel_guc_allocate_vma(guc, guc_log_size); if (IS_ERR(vma)) { @@ -672,6 +677,8 @@ stringify_guc_log_type(enum guc_log_buffer_type type) return "DEBUG"; case GUC_CRASH_DUMP_LOG_BUFFER: return "CRASH"; + case GUC_CAPTURE_LOG_BUFFER: + return "CAPTURE"; default: MISSING_CASE(type); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index ac1ee1d5ce10..9d9004dc58f1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -18,9 +18,11 @@ struct intel_guc; #ifdef CONFIG_DRM_I915_DEBUG_GUC #define CRASH_BUFFER_SIZE SZ_2M #define DEBUG_BUFFER_SIZE SZ_16M +#define CAPTURE_BUFFER_SIZE SZ_4M #else #define CRASH_BUFFER_SIZE SZ_8K #define DEBUG_BUFFER_SIZE SZ_64K +#define CAPTURE_BUFFER_SIZE SZ_16K #endif /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 77fbcd8730ee..0bfc92b1b982 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -4003,6 +4003,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, return 0; } +int intel_guc_error_capture_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len) +{ + int status; + + if (unlikely(len != 1)) { + drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + return -EPROTO; + } + + status = msg[0]; + drm_info(&guc_to_gt(guc)->i915->drm, "Got error capture: status = %d", status); + + /* Add extraction of error capture dump */ + + return 0; +} + static struct intel_engine_cs * guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) { From patchwork Mon Nov 22 23:03:57 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Alan Previn X-Patchwork-Id: 12633101 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id A975BC433F5 for ; Mon, 22 Nov 2021 23:03:10 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id B9DDD89F38; Mon, 22 Nov 2021 23:02:57 +0000 (UTC) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by gabe.freedesktop.org (Postfix) with ESMTPS id 0787889ECD for ; Mon, 22 Nov 2021 23:02:52 +0000 (UTC) X-IronPort-AV: E=McAfee;i="6200,9189,10176"; a="215612618" X-IronPort-AV: E=Sophos;i="5.87,255,1631602800"; d="scan'208";a="215612618" Received: from orsmga005.jf.intel.com ([10.7.209.41]) by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 22 Nov 2021 15:02:51 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.87,255,1631602800"; d="scan'208";a="674237062" Received: from aalteres-desk.fm.intel.com ([10.80.57.53]) by orsmga005.jf.intel.com with ESMTP; 22 Nov 2021 15:02:51 -0800 From: Alan Previn To: intel-gfx@lists.freedesktop.org Date: Mon, 22 Nov 2021 15:03:57 -0800 Message-Id: <20211122230402.2023576-3-alan.previn.teres.alexis@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20211122230402.2023576-1-alan.previn.teres.alexis@intel.com> References: <20211122230402.2023576-1-alan.previn.teres.alexis@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [RFC 2/7] drm/i915/guc: Update GuC ADS size for error capture lists X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Alan Previn Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Update GuC ADS size allocation to include space for the lists of error state capture register descriptors. Also, populate the lists of registers we want GuC to report back to Host on engine reset events. This list should include global, engine-class and engine-instance registers for every engine-class type on the current hardware. NOTE: Start with a fake table of register lists to layout the framework before adding real registers in subsequent patch. Signed-off-by: Alan Previn --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc.c | 10 +- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 5 + drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 176 ++++++++++++- .../gpu/drm/i915/gt/uc/intel_guc_capture.c | 232 ++++++++++++++++++ .../gpu/drm/i915/gt/uc/intel_guc_capture.h | 47 ++++ drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 19 +- 7 files changed, 476 insertions(+), 14 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 074d6b8edd23..e3c4d5cea4c3 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -190,6 +190,7 @@ i915-y += gt/uc/intel_uc.o \ gt/uc/intel_guc_rc.o \ gt/uc/intel_guc_slpc.o \ gt/uc/intel_guc_submission.o \ + gt/uc/intel_guc_capture.o \ gt/uc/intel_huc.o \ gt/uc/intel_huc_debugfs.o \ gt/uc/intel_huc_fw.o diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 5cf9ebd2ee55..458f0d248a5a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -335,9 +335,14 @@ int intel_guc_init(struct intel_guc *guc) if (ret) goto err_fw; - ret = intel_guc_ads_create(guc); + ret = intel_guc_capture_init(guc); if (ret) goto err_log; + + ret = intel_guc_ads_create(guc); + if (ret) + goto err_capture; + GEM_BUG_ON(!guc->ads_vma); ret = intel_guc_ct_init(&guc->ct); @@ -376,6 +381,8 @@ int intel_guc_init(struct intel_guc *guc) intel_guc_ct_fini(&guc->ct); err_ads: intel_guc_ads_destroy(guc); +err_capture: + intel_guc_capture_destroy(guc); err_log: intel_guc_log_destroy(&guc->log); err_fw: @@ -403,6 +410,7 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_ct_fini(&guc->ct); intel_guc_ads_destroy(guc); + intel_guc_capture_destroy(guc); intel_guc_log_destroy(&guc->log); intel_uc_fw_fini(&guc->fw); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 9de99772f916..d136c69abe12 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -16,6 +16,7 @@ #include "intel_guc_log.h" #include "intel_guc_reg.h" #include "intel_guc_slpc_types.h" +#include "intel_guc_capture.h" #include "intel_uc_fw.h" #include "i915_utils.h" #include "i915_vma.h" @@ -37,6 +38,8 @@ struct intel_guc { struct intel_guc_ct ct; /** @slpc: sub-structure containing SLPC related data and objects */ struct intel_guc_slpc slpc; + /** @capture: the error-state-capture module's data and objects */ + struct intel_guc_state_capture capture; /** @sched_engine: Global engine used to submit requests to GuC */ struct i915_sched_engine *sched_engine; @@ -138,6 +141,8 @@ struct intel_guc { u32 ads_regset_size; /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; + /** @ads_capture_size: size of register lists in the ADS used for error capture */ + u32 ads_capture_size; /** @ads_engine_usage_size: size of engine usage in the ADS */ u32 ads_engine_usage_size; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 6c81ddd303d3..2780c0fadd01 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -10,6 +10,7 @@ #include "gt/shmem_utils.h" #include "intel_guc_ads.h" #include "intel_guc_fwif.h" +#include "intel_guc_capture.h" #include "intel_uc.h" #include "i915_drv.h" @@ -71,8 +72,7 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc) static u32 guc_ads_capture_size(struct intel_guc *guc) { - /* Basic support to init ADS without a proper GuC error capture list */ - return PAGE_ALIGN(PAGE_SIZE); + return PAGE_ALIGN(guc->ads_capture_size); } static u32 guc_ads_private_data_size(struct intel_guc *guc) @@ -519,24 +519,170 @@ static void guc_init_golden_context(struct intel_guc *guc) GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); } -static void guc_capture_prep_lists(struct intel_guc *guc, struct __guc_ads_blob *blob) +static int +guc_fill_reglist(struct intel_guc *guc, struct __guc_ads_blob *blob, int vf, bool enabled, + int classid, int type, char *typename, u16 *p_numregs, int newnum, u8 **p_virt_ptr, + u32 *p_blobptr_to_ggtt, u32 *p_ggtt, u32 null_ggtt) { - int i, j; - u32 addr_ggtt, offset; + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct guc_debug_capture_list *listnode; + int size = 0; - offset = guc_ads_capture_offset(guc); - addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + if (blob && *p_numregs != newnum) { + if (type == GUC_CAPTURE_LIST_TYPE_GLOBAL) + drm_warn(&i915->drm, "Guc-Cap VF%d-%s num-reg mismatch was=%d now=%d!\n", + vf, typename, *p_numregs, newnum); + else + drm_warn(&i915->drm, "Guc-Cap VF%d-Class-%d-%s num-reg mismatch was=%d now=%d!\n", + vf, classid, typename, *p_numregs, newnum); + } + /* + * For enabled capture lists, we not only need to call capture module to help + * populate the list-descriptor into the correct ads capture structures, but + * we also need to increment the virtual pointers and ggtt offsets so that + * caller has the subsequent gfx memory location. + */ + *p_numregs = newnum; + size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) + + (newnum * sizeof(struct guc_mmio_reg))); + /* if caller hasn't allocated ADS blob, return size and counts, we're done */ + if (!blob) + return size; + if (blob) { + /* if caller allocated ADS blob, populate the capture register descriptors */ + if (!newnum) { + *p_blobptr_to_ggtt = null_ggtt; + } else { + /* get ptr and populate header info: */ + *p_blobptr_to_ggtt = *p_ggtt; + listnode = (struct guc_debug_capture_list *)*p_virt_ptr; + *p_ggtt += sizeof(struct guc_debug_capture_list); + *p_virt_ptr += sizeof(struct guc_debug_capture_list); + listnode->header.info = FIELD_PREP(GUC_CAPTURELISTHDR_NUMDESCR, *p_numregs); + + /* get ptr and populate register descriptor list: */ + intel_guc_capture_list_init(guc, vf, type, classid, + (struct guc_mmio_reg *)*p_virt_ptr, + *p_numregs); + + /* increment ptrs for that header: */ + *p_ggtt += size - sizeof(struct guc_debug_capture_list); + *p_virt_ptr += size - sizeof(struct guc_debug_capture_list); + } + } + + return size; +} + +static int guc_capture_prep_lists(struct intel_guc *guc, struct __guc_ads_blob *blob) +{ + struct intel_gt *gt = guc_to_gt(guc); + int i, j, size; + u32 ggtt, null_ggtt, offset, alloc_size = 0; + struct guc_gt_system_info *info, local_info; + struct guc_debug_capture_list *listnode; + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct intel_guc_state_capture *gc = &guc->capture; + u16 tmp = 0; + u8 *ptr = NULL; + + if (blob) { + offset = guc_ads_capture_offset(guc); + ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + ptr = ((u8 *)blob) + offset; + info = &blob->system_info; + } else { + memset(&local_info, 0, sizeof(local_info)); + info = &local_info; + fill_engine_enable_masks(gt, info); + } + + /* first, set aside the first page for a capture_list with zero descriptors */ + alloc_size = PAGE_SIZE; + if (blob) { + listnode = (struct guc_debug_capture_list *)ptr; + listnode->header.info = FIELD_PREP(GUC_CAPTURELISTHDR_NUMDESCR, 0); + null_ggtt = ggtt; + ggtt += PAGE_SIZE; + ptr += PAGE_SIZE; + } - /* FIXME: Populate a proper capture list */ +#define COUNT_REGS intel_guc_capture_list_count +#define FILL_REGS guc_fill_reglist +#define TYPE_CLASS GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS +#define TYPE_INSTANCE GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) { for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) { - blob->ads.capture_instance[i][j] = addr_ggtt; - blob->ads.capture_class[i][j] = addr_ggtt; + if (!info->engine_enabled_masks[j]) { + if (gc->num_class_regs[i][j]) + drm_warn(&i915->drm, "GuC-Cap VF%d-class-%d " + "class regs valid mismatch was=%d now=%d!\n", + i, j, gc->num_class_regs[i][j], tmp); + if (gc->num_instance_regs[i][j]) + drm_warn(&i915->drm, "GuC-Cap VF%d-class-%d " + "inst regs valid mismatch was=%d now=%d!\n", + i, j, gc->num_instance_regs[i][j], tmp); + gc->num_class_regs[i][j] = 0; + gc->num_instance_regs[i][j] = 0; + if (blob) { + blob->ads.capture_class[i][j] = null_ggtt; + blob->ads.capture_instance[i][j] = null_ggtt; + } + } else { + if (!COUNT_REGS(guc, i, TYPE_CLASS, + guc_class_to_engine_class(j), &tmp)) { + size = FILL_REGS(guc, blob, i, true, j, TYPE_CLASS, + "class", &gc->num_class_regs[i][j], + tmp, &ptr, + &blob->ads.capture_class[i][j], + &ggtt, null_ggtt); + gc->class_list_size += size; + alloc_size += size; + } else { + gc->num_class_regs[i][j] = 0; + if (blob) + blob->ads.capture_class[i][j] = null_ggtt; + } + if (!COUNT_REGS(guc, i, TYPE_INSTANCE, + guc_class_to_engine_class(j), &tmp)) { + size = FILL_REGS(guc, blob, i, true, j, TYPE_INSTANCE, + "instance", &gc->num_instance_regs[i][j], + tmp, &ptr, + &blob->ads.capture_instance[i][j], + &ggtt, null_ggtt); + gc->instance_list_size += size; + alloc_size += size; + } else { + gc->num_instance_regs[i][j] = 0; + if (blob) + blob->ads.capture_instance[i][j] = null_ggtt; + } + } + } + if (!COUNT_REGS(guc, i, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp)) { + size = FILL_REGS(guc, blob, i, true, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, + "global", &gc->num_global_regs[i], tmp, &ptr, + &blob->ads.capture_global[i], &ggtt, null_ggtt); + gc->global_list_size += size; + alloc_size += size; + } else { + gc->num_global_regs[i] = 0; + if (blob) + blob->ads.capture_global[i] = null_ggtt; } - - blob->ads.capture_global[i] = addr_ggtt; } + +#undef COUNT_REGS +#undef FILL_REGS +#undef TYPE_CLASS +#undef TYPE_INSTANCE + + if (guc->ads_capture_size && guc->ads_capture_size != PAGE_ALIGN(alloc_size)) + drm_warn(&i915->drm, "GuC->ADS->Capture alloc size changed from %d to %d\n", + guc->ads_capture_size, PAGE_ALIGN(alloc_size)); + + return PAGE_ALIGN(alloc_size); } static void __guc_ads_init(struct intel_guc *guc) @@ -614,6 +760,12 @@ int intel_guc_ads_create(struct intel_guc *guc) return ret; guc->ads_golden_ctxt_size = ret; + /* Likewise the capture lists: */ + ret = guc_capture_prep_lists(guc, NULL); + if (ret < 0) + return ret; + guc->ads_capture_size = ret; + /* Now the total size can be determined: */ size = guc_ads_blob_size(guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c new file mode 100644 index 000000000000..c741c77b7fc8 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021-2021 Intel Corporation + */ + +#include + +#include "i915_drv.h" +#include "i915_drv.h" +#include "i915_memcpy.h" +#include "gt/intel_gt.h" + +#include "intel_guc_fwif.h" +#include "intel_guc_capture.h" + +/* Define all device tables of GuC error capture register lists */ + +/********************************* Gen12 LP *********************************/ +/************** GLOBAL *************/ +struct __guc_mmio_reg_descr gen12lp_global_regs[] = { + {SWF_ILK(0), 0, 0, "SWF_ILK0"}, + /* Add additional register list */ +}; + +/********** RENDER/COMPUTE *********/ +/* Per-Class */ +struct __guc_mmio_reg_descr gen12lp_rc_class_regs[] = { + {SWF_ILK(0), 0, 0, "SWF_ILK0"}, + /* Add additional register list */ +}; + +/* Per-Engine-Instance */ +struct __guc_mmio_reg_descr gen12lp_rc_inst_regs[] = { + {SWF_ILK(0), 0, 0, "SWF_ILK0"}, + /* Add additional register list */ +}; + +/************* MEDIA-VD ************/ +/* Per-Class */ +struct __guc_mmio_reg_descr gen12lp_vd_class_regs[] = { + {SWF_ILK(0), 0, 0, "SWF_ILK0"}, + /* Add additional register list */ +}; + +/* Per-Engine-Instance */ +struct __guc_mmio_reg_descr gen12lp_vd_inst_regs[] = { + {SWF_ILK(0), 0, 0, "SWF_ILK0"}, + /* Add additional register list */ +}; + +/************* MEDIA-VEC ***********/ +/* Per-Class */ +struct __guc_mmio_reg_descr gen12lp_vec_class_regs[] = { + {SWF_ILK(0), 0, 0, "SWF_ILK0"}, + /* Add additional register list */ +}; + +/* Per-Engine-Instance */ +struct __guc_mmio_reg_descr gen12lp_vec_inst_regs[] = { + {SWF_ILK(0), 0, 0, "SWF_ILK0"}, + /* Add additional register list */ +}; + +/********** List of lists **********/ +struct __guc_mmio_reg_descr_group gen12lp_lists[] = { + { + .list = gen12lp_global_regs, + .num_regs = (sizeof(gen12lp_global_regs) / sizeof(struct __guc_mmio_reg_descr)), + .owner = GUC_CAPTURE_LIST_INDEX_PF, + .type = GUC_CAPTURE_LIST_TYPE_GLOBAL, + .engine = 0 + }, + { + .list = gen12lp_rc_class_regs, + .num_regs = (sizeof(gen12lp_rc_class_regs) / sizeof(struct __guc_mmio_reg_descr)), + .owner = GUC_CAPTURE_LIST_INDEX_PF, + .type = GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, + .engine = RENDER_CLASS + }, + { + .list = gen12lp_rc_inst_regs, + .num_regs = (sizeof(gen12lp_rc_inst_regs) / sizeof(struct __guc_mmio_reg_descr)), + .owner = GUC_CAPTURE_LIST_INDEX_PF, + .type = GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, + .engine = RENDER_CLASS + }, + { + .list = gen12lp_vd_class_regs, + .num_regs = (sizeof(gen12lp_vd_class_regs) / sizeof(struct __guc_mmio_reg_descr)), + .owner = GUC_CAPTURE_LIST_INDEX_PF, + .type = GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, + .engine = VIDEO_DECODE_CLASS + }, + { + .list = gen12lp_vd_inst_regs, + .num_regs = (sizeof(gen12lp_vd_inst_regs) / sizeof(struct __guc_mmio_reg_descr)), + .owner = GUC_CAPTURE_LIST_INDEX_PF, + .type = GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, + .engine = VIDEO_DECODE_CLASS + }, + { + .list = gen12lp_vec_class_regs, + .num_regs = (sizeof(gen12lp_vec_class_regs) / sizeof(struct __guc_mmio_reg_descr)), + .owner = GUC_CAPTURE_LIST_INDEX_PF, + .type = GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, + .engine = VIDEO_ENHANCEMENT_CLASS + }, + { + .list = gen12lp_vec_inst_regs, + .num_regs = (sizeof(gen12lp_vec_inst_regs) / sizeof(struct __guc_mmio_reg_descr)), + .owner = GUC_CAPTURE_LIST_INDEX_PF, + .type = GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, + .engine = VIDEO_ENHANCEMENT_CLASS + }, + {NULL, 0, 0, 0, 0} +}; + +/************ FIXME: Populate tables for other devices in subsequent patch ************/ + +static struct __guc_mmio_reg_descr_group * +guc_capture_get_device_reglist(struct drm_i915_private *dev_priv) +{ + if (IS_TIGERLAKE(dev_priv) || IS_ROCKETLAKE(dev_priv) || + IS_ALDERLAKE_S(dev_priv) || IS_ALDERLAKE_P(dev_priv)) { + return gen12lp_lists; + } + + return NULL; +} + +static inline struct __guc_mmio_reg_descr_group * +guc_capture_get_one_list(struct __guc_mmio_reg_descr_group *reglists, u32 owner, u32 type, u32 id) +{ + int i = 0; + + if (!reglists) + return NULL; + while (reglists[i].list) { + if (reglists[i].owner == owner && + reglists[i].type == type) { + if (reglists[i].type == GUC_CAPTURE_LIST_TYPE_GLOBAL || + reglists[i].engine == id) { + return ®lists[i]; + } + } + ++i; + } + return NULL; +} + +static inline void +warn_with_capture_list_identifier(struct drm_i915_private *i915, char *msg, + u32 owner, u32 type, u32 classid) +{ + const char *ownerstr[GUC_CAPTURE_LIST_INDEX_MAX] = {"PF", "VF"}; + const char *typestr[GUC_CAPTURE_LIST_TYPE_MAX - 1] = {"Class", "Instance"}; + const char *classstr[GUC_LAST_ENGINE_CLASS + 1] = {"Render", "Video", "VideoEnhance", + "Blitter", "Reserved"}; + static const char unknownstr[] = "unknown"; + + if (type == GUC_CAPTURE_LIST_TYPE_GLOBAL) + drm_warn(&i915->drm, "GuC-capture: %s for %s Global-Registers.\n", msg, + (owner < GUC_CAPTURE_LIST_INDEX_MAX) ? ownerstr[owner] : unknownstr); + else + drm_warn(&i915->drm, "GuC-capture: %s for %s %s-Registers on %s-Engine\n", msg, + (owner < GUC_CAPTURE_LIST_INDEX_MAX) ? ownerstr[owner] : unknownstr, + (type < GUC_CAPTURE_LIST_TYPE_MAX) ? typestr[type - 1] : unknownstr, + (classid < GUC_LAST_ENGINE_CLASS + 1) ? classstr[classid] : unknownstr); +} + +int intel_guc_capture_list_count(struct intel_guc *guc, u32 owner, u32 type, u32 classid, + u16 *num_entries) +{ + struct drm_i915_private *dev_priv = (guc_to_gt(guc))->i915; + struct __guc_mmio_reg_descr_group *reglists = guc->capture.reglists; + struct __guc_mmio_reg_descr_group *match; + + if (!reglists) + return -ENODEV; + + match = guc_capture_get_one_list(reglists, owner, type, classid); + if (match) { + *num_entries = match->num_regs; + return 0; + } + + warn_with_capture_list_identifier(dev_priv, "Missing register list size", owner, type, + classid); + + return -ENODATA; +} + +int intel_guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid, + struct guc_mmio_reg *ptr, u16 num_entries) +{ + u32 j = 0; + struct drm_i915_private *dev_priv = (guc_to_gt(guc))->i915; + struct __guc_mmio_reg_descr_group *reglists = guc->capture.reglists; + struct __guc_mmio_reg_descr_group *match; + + if (!reglists) + return -ENODEV; + + match = guc_capture_get_one_list(reglists, owner, type, classid); + if (match) { + while (j < num_entries && j < match->num_regs) { + ptr[j].offset = match->list[j].reg.reg; + ptr[j].value = 0xDEADF00D; + ptr[j].flags = match->list[j].flags; + ptr[j].mask = match->list[j].mask; + ++j; + } + return 0; + } + + warn_with_capture_list_identifier(dev_priv, "Missing register list init", owner, type, + classid); + + return -ENODATA; +} + +void intel_guc_capture_destroy(struct intel_guc *guc) +{ +} + +int intel_guc_capture_init(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = (guc_to_gt(guc))->i915; + + guc->capture.reglists = guc_capture_get_device_reglist(dev_priv); + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h new file mode 100644 index 000000000000..352940b8bc87 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021-2021 Intel Corporation + */ + +#ifndef _INTEL_GUC_CAPTURE_H +#define _INTEL_GUC_CAPTURE_H + +#include +#include +#include "intel_guc_fwif.h" + +struct intel_guc; + +struct __guc_mmio_reg_descr { + i915_reg_t reg; + u32 flags; + u32 mask; + char *regname; +}; + +struct __guc_mmio_reg_descr_group { + struct __guc_mmio_reg_descr *list; + u32 num_regs; + u32 owner; /* see enum guc_capture_owner */ + u32 type; /* see enum guc_capture_type */ + u32 engine; /* as per MAX_ENGINE_CLASS */ +}; + +struct intel_guc_state_capture { + struct __guc_mmio_reg_descr_group *reglists; + u16 num_instance_regs[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; + u16 num_class_regs[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; + u16 num_global_regs[GUC_CAPTURE_LIST_INDEX_MAX]; + int instance_list_size; + int class_list_size; + int global_list_size; +}; + +int intel_guc_capture_list_count(struct intel_guc *guc, u32 owner, u32 type, u32 class, + u16 *num_entries); +int intel_guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 class, + struct guc_mmio_reg *ptr, u16 num_entries); +void intel_guc_capture_destroy(struct intel_guc *guc); +int intel_guc_capture_init(struct intel_guc *guc); + +#endif /* _INTEL_GUC_CAPTURE_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 767684b6af67..1a1d2271c7e9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -285,13 +285,30 @@ struct guc_gt_system_info { } __packed; /* Capture-types of GuC capture register lists */ -enum +enum guc_capture_owner { GUC_CAPTURE_LIST_INDEX_PF = 0, GUC_CAPTURE_LIST_INDEX_VF = 1, GUC_CAPTURE_LIST_INDEX_MAX = 2, }; +/*Register-types of GuC capture register lists */ +enum guc_capture_type { + GUC_CAPTURE_LIST_TYPE_GLOBAL = 0, + GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, + GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, + GUC_CAPTURE_LIST_TYPE_MAX, +}; + +struct guc_debug_capture_list_header { + u32 info; + #define GUC_CAPTURELISTHDR_NUMDESCR GENMASK(15, 0) +}; + +struct guc_debug_capture_list { + struct guc_debug_capture_list_header header; +}; + /* GuC Additional Data Struct */ struct guc_ads { struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; From patchwork Mon Nov 22 23:03:58 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alan Previn X-Patchwork-Id: 12633099 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id 460C1C433EF for ; Mon, 22 Nov 2021 23:03:09 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 9A9E189F31; Mon, 22 Nov 2021 23:02:56 +0000 (UTC) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by gabe.freedesktop.org (Postfix) with ESMTPS id 27C8789ED6 for ; Mon, 22 Nov 2021 23:02:53 +0000 (UTC) X-IronPort-AV: E=McAfee;i="6200,9189,10176"; a="215612621" X-IronPort-AV: E=Sophos;i="5.87,255,1631602800"; d="scan'208";a="215612621" Received: from orsmga005.jf.intel.com ([10.7.209.41]) by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 22 Nov 2021 15:02:52 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.87,255,1631602800"; d="scan'208";a="674237067" Received: from aalteres-desk.fm.intel.com ([10.80.57.53]) by orsmga005.jf.intel.com with ESMTP; 22 Nov 2021 15:02:51 -0800 From: Alan Previn To: intel-gfx@lists.freedesktop.org Date: Mon, 22 Nov 2021 15:03:58 -0800 Message-Id: <20211122230402.2023576-4-alan.previn.teres.alexis@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20211122230402.2023576-1-alan.previn.teres.alexis@intel.com> References: <20211122230402.2023576-1-alan.previn.teres.alexis@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [RFC 3/7] drm/i915/guc: Populate XE_LP register lists for GuC error state capture. X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Alan Previn Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Add device specific tables and register lists to cover different engines class types for GuC error state capture. Also, add runtime allocation and freeing of extended register lists for registers that need steering identifiers that depend on the detected HW config. Signed-off-by: Alan Previn --- .../gpu/drm/i915/gt/uc/intel_guc_capture.c | 260 +++++++++++++----- .../gpu/drm/i915/gt/uc/intel_guc_capture.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 2 + 3 files changed, 197 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index c741c77b7fc8..eec1d193ac26 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -9,120 +9,245 @@ #include "i915_drv.h" #include "i915_memcpy.h" #include "gt/intel_gt.h" +#include "gt/intel_lrc_reg.h" #include "intel_guc_fwif.h" #include "intel_guc_capture.h" -/* Define all device tables of GuC error capture register lists */ +/* + * Define all device tables of GuC error capture register lists + * NOTE: For engine-registers, GuC only needs the register offsets + * from the engine-mmio-base + */ +#define COMMON_GEN12BASE_GLOBAL() \ + {GEN12_FAULT_TLB_DATA0, 0, 0, "GEN12_FAULT_TLB_DATA0"}, \ + {GEN12_FAULT_TLB_DATA1, 0, 0, "GEN12_FAULT_TLB_DATA1"}, \ + {FORCEWAKE_MT, 0, 0, "FORCEWAKE_MT"}, \ + {DERRMR, 0, 0, "DERRMR"}, \ + {GEN12_AUX_ERR_DBG, 0, 0, "GEN12_AUX_ERR_DBG"}, \ + {GEN12_GAM_DONE, 0, 0, "GEN12_GAM_DONE"}, \ + {GEN11_GUC_SG_INTR_ENABLE, 0, 0, "GEN11_GUC_SG_INTR_ENABLE"}, \ + {GEN11_CRYPTO_RSVD_INTR_ENABLE, 0, 0, "GEN11_CRYPTO_RSVD_INTR_ENABLE"}, \ + {GEN11_GUNIT_CSME_INTR_ENABLE, 0, 0, "GEN11_GUNIT_CSME_INTR_ENABLE"}, \ + {GEN12_RING_FAULT_REG, 0, 0, "GEN12_RING_FAULT_REG"} + +#define COMMON_GEN12BASE_ENGINE_INSTANCE() \ + {RING_PSMI_CTL(0), 0, 0, "RING_PSMI_CTL"}, \ + {RING_ESR(0), 0, 0, "RING_ESR"}, \ + {RING_ESR(0), 0, 0, "RING_ESR"}, \ + {RING_DMA_FADD(0), 0, 0, "RING_DMA_FADD_LOW32"}, \ + {RING_DMA_FADD_UDW(0), 0, 0, "RING_DMA_FADD_UP32"}, \ + {RING_IPEIR(0), 0, 0, "RING_IPEIR"}, \ + {RING_IPEHR(0), 0, 0, "RING_IPEHR"}, \ + {RING_INSTPS(0), 0, 0, "RING_INSTPS"}, \ + {RING_BBADDR(0), 0, 0, "RING_BBADDR_LOW32"}, \ + {RING_BBADDR_UDW(0), 0, 0, "RING_BBADDR_UP32"}, \ + {RING_BBSTATE(0), 0, 0, "RING_BBSTATE"}, \ + {CCID(0), 0, 0, "CCID"}, \ + {RING_ACTHD(0), 0, 0, "RING_ACTHD_LOW32"}, \ + {RING_ACTHD_UDW(0), 0, 0, "RING_ACTHD_UP32"}, \ + {RING_INSTPM(0), 0, 0, "RING_INSTPM"}, \ + {RING_NOPID(0), 0, 0, "RING_NOPID"}, \ + {RING_START(0), 0, 0, "RING_START"}, \ + {RING_HEAD(0), 0, 0, "RING_HEAD"}, \ + {RING_TAIL(0), 0, 0, "RING_TAIL"}, \ + {RING_CTL(0), 0, 0, "RING_CTL"}, \ + {RING_MI_MODE(0), 0, 0, "RING_MI_MODE"}, \ + {RING_CONTEXT_CONTROL(0), 0, 0, "RING_CONTEXT_CONTROL"}, \ + {RING_INSTDONE(0), 0, 0, "RING_INSTDONE"}, \ + {RING_HWS_PGA(0), 0, 0, "RING_HWS_PGA"}, \ + {RING_MODE_GEN7(0), 0, 0, "RING_MODE_GEN7"}, \ + {GEN8_RING_PDP_LDW(0, 0), 0, 0, "GEN8_RING_PDP0_LDW"}, \ + {GEN8_RING_PDP_UDW(0, 0), 0, 0, "GEN8_RING_PDP0_UDW"}, \ + {GEN8_RING_PDP_LDW(0, 1), 0, 0, "GEN8_RING_PDP1_LDW"}, \ + {GEN8_RING_PDP_UDW(0, 1), 0, 0, "GEN8_RING_PDP1_UDW"}, \ + {GEN8_RING_PDP_LDW(0, 2), 0, 0, "GEN8_RING_PDP2_LDW"}, \ + {GEN8_RING_PDP_UDW(0, 2), 0, 0, "GEN8_RING_PDP2_UDW"}, \ + {GEN8_RING_PDP_LDW(0, 3), 0, 0, "GEN8_RING_PDP3_LDW"}, \ + {GEN8_RING_PDP_UDW(0, 3), 0, 0, "GEN8_RING_PDP3_UDW"} + +#define COMMON_GEN12BASE_HAS_EU() \ + {EIR, 0, 0, "EIR"} + +#define COMMON_GEN12BASE_RENDER() \ + {GEN7_SC_INSTDONE, 0, 0, "GEN7_SC_INSTDONE"}, \ + {GEN12_SC_INSTDONE_EXTRA, 0, 0, "GEN12_SC_INSTDONE_EXTRA"}, \ + {GEN12_SC_INSTDONE_EXTRA2, 0, 0, "GEN12_SC_INSTDONE_EXTRA2"} + +#define COMMON_GEN12BASE_VEC() \ + {GEN11_VCS_VECS_INTR_ENABLE, 0, 0, "GEN11_VCS_VECS_INTR_ENABLE"}, \ + {GEN12_SFC_DONE(0), 0, 0, "GEN12_SFC_DONE0"}, \ + {GEN12_SFC_DONE(1), 0, 0, "GEN12_SFC_DONE1"}, \ + {GEN12_SFC_DONE(2), 0, 0, "GEN12_SFC_DONE2"}, \ + {GEN12_SFC_DONE(3), 0, 0, "GEN12_SFC_DONE3"} /********************************* Gen12 LP *********************************/ /************** GLOBAL *************/ struct __guc_mmio_reg_descr gen12lp_global_regs[] = { - {SWF_ILK(0), 0, 0, "SWF_ILK0"}, - /* Add additional register list */ + COMMON_GEN12BASE_GLOBAL(), + {GEN7_ROW_INSTDONE, 0, 0, "GEN7_ROW_INSTDONE"}, }; /********** RENDER/COMPUTE *********/ /* Per-Class */ struct __guc_mmio_reg_descr gen12lp_rc_class_regs[] = { - {SWF_ILK(0), 0, 0, "SWF_ILK0"}, - /* Add additional register list */ + COMMON_GEN12BASE_HAS_EU(), + COMMON_GEN12BASE_RENDER(), + {GEN11_RENDER_COPY_INTR_ENABLE, 0, 0, "GEN11_RENDER_COPY_INTR_ENABLE"}, }; /* Per-Engine-Instance */ struct __guc_mmio_reg_descr gen12lp_rc_inst_regs[] = { - {SWF_ILK(0), 0, 0, "SWF_ILK0"}, - /* Add additional register list */ + COMMON_GEN12BASE_ENGINE_INSTANCE(), }; /************* MEDIA-VD ************/ /* Per-Class */ struct __guc_mmio_reg_descr gen12lp_vd_class_regs[] = { - {SWF_ILK(0), 0, 0, "SWF_ILK0"}, - /* Add additional register list */ }; /* Per-Engine-Instance */ struct __guc_mmio_reg_descr gen12lp_vd_inst_regs[] = { - {SWF_ILK(0), 0, 0, "SWF_ILK0"}, - /* Add additional register list */ + COMMON_GEN12BASE_ENGINE_INSTANCE(), }; /************* MEDIA-VEC ***********/ /* Per-Class */ struct __guc_mmio_reg_descr gen12lp_vec_class_regs[] = { - {SWF_ILK(0), 0, 0, "SWF_ILK0"}, - /* Add additional register list */ + COMMON_GEN12BASE_VEC(), }; /* Per-Engine-Instance */ struct __guc_mmio_reg_descr gen12lp_vec_inst_regs[] = { - {SWF_ILK(0), 0, 0, "SWF_ILK0"}, - /* Add additional register list */ + COMMON_GEN12BASE_ENGINE_INSTANCE(), +}; + +/************* BLITTER ***********/ +/* Per-Class */ +struct __guc_mmio_reg_descr gen12lp_blt_class_regs[] = { +}; + +/* Per-Engine-Instance */ +struct __guc_mmio_reg_descr gen12lp_blt_inst_regs[] = { + COMMON_GEN12BASE_ENGINE_INSTANCE(), }; +#define TO_GCAP_DEF(x) (GUC_CAPTURE_LIST_##x) +#define MAKE_GCAP_REGLIST_DESCR(regslist, regsowner, regstype, class) \ + { \ + .list = (regslist), \ + .num_regs = (sizeof(regslist) / sizeof(struct __guc_mmio_reg_descr)), \ + .owner = TO_GCAP_DEF(regsowner), \ + .type = TO_GCAP_DEF(regstype), \ + .engine = class, \ + .num_ext = 0, \ + .ext = NULL, \ + } + + /********** List of lists **********/ -struct __guc_mmio_reg_descr_group gen12lp_lists[] = { - { - .list = gen12lp_global_regs, - .num_regs = (sizeof(gen12lp_global_regs) / sizeof(struct __guc_mmio_reg_descr)), - .owner = GUC_CAPTURE_LIST_INDEX_PF, - .type = GUC_CAPTURE_LIST_TYPE_GLOBAL, - .engine = 0 - }, - { - .list = gen12lp_rc_class_regs, - .num_regs = (sizeof(gen12lp_rc_class_regs) / sizeof(struct __guc_mmio_reg_descr)), - .owner = GUC_CAPTURE_LIST_INDEX_PF, - .type = GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, - .engine = RENDER_CLASS - }, - { - .list = gen12lp_rc_inst_regs, - .num_regs = (sizeof(gen12lp_rc_inst_regs) / sizeof(struct __guc_mmio_reg_descr)), - .owner = GUC_CAPTURE_LIST_INDEX_PF, - .type = GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, - .engine = RENDER_CLASS - }, - { - .list = gen12lp_vd_class_regs, - .num_regs = (sizeof(gen12lp_vd_class_regs) / sizeof(struct __guc_mmio_reg_descr)), - .owner = GUC_CAPTURE_LIST_INDEX_PF, - .type = GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, - .engine = VIDEO_DECODE_CLASS - }, - { - .list = gen12lp_vd_inst_regs, - .num_regs = (sizeof(gen12lp_vd_inst_regs) / sizeof(struct __guc_mmio_reg_descr)), - .owner = GUC_CAPTURE_LIST_INDEX_PF, - .type = GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, - .engine = VIDEO_DECODE_CLASS - }, - { - .list = gen12lp_vec_class_regs, - .num_regs = (sizeof(gen12lp_vec_class_regs) / sizeof(struct __guc_mmio_reg_descr)), - .owner = GUC_CAPTURE_LIST_INDEX_PF, - .type = GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, - .engine = VIDEO_ENHANCEMENT_CLASS - }, - { - .list = gen12lp_vec_inst_regs, - .num_regs = (sizeof(gen12lp_vec_inst_regs) / sizeof(struct __guc_mmio_reg_descr)), - .owner = GUC_CAPTURE_LIST_INDEX_PF, - .type = GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, - .engine = VIDEO_ENHANCEMENT_CLASS - }, +struct __guc_mmio_reg_descr_group xe_lpd_lists[] = { + MAKE_GCAP_REGLIST_DESCR(gen12lp_global_regs, INDEX_PF, TYPE_GLOBAL, 0), + MAKE_GCAP_REGLIST_DESCR(gen12lp_rc_class_regs, INDEX_PF, TYPE_ENGINE_CLASS, GUC_RENDER_CLASS), + MAKE_GCAP_REGLIST_DESCR(gen12lp_rc_inst_regs, INDEX_PF, TYPE_ENGINE_INSTANCE, GUC_RENDER_CLASS), + MAKE_GCAP_REGLIST_DESCR(gen12lp_vd_class_regs, INDEX_PF, TYPE_ENGINE_CLASS, GUC_VIDEO_CLASS), + MAKE_GCAP_REGLIST_DESCR(gen12lp_vd_inst_regs, INDEX_PF, TYPE_ENGINE_INSTANCE, GUC_VIDEO_CLASS), + MAKE_GCAP_REGLIST_DESCR(gen12lp_vec_class_regs, INDEX_PF, TYPE_ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS), + MAKE_GCAP_REGLIST_DESCR(gen12lp_vec_inst_regs, INDEX_PF, TYPE_ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS), + MAKE_GCAP_REGLIST_DESCR(gen12lp_blt_class_regs, INDEX_PF, TYPE_ENGINE_CLASS, GUC_BLITTER_CLASS), + MAKE_GCAP_REGLIST_DESCR(gen12lp_blt_inst_regs, INDEX_PF, TYPE_ENGINE_INSTANCE, GUC_BLITTER_CLASS), {NULL, 0, 0, 0, 0} }; -/************ FIXME: Populate tables for other devices in subsequent patch ************/ +/************* Populate additional registers / device tables *************/ + +static inline struct __guc_mmio_reg_descr ** +guc_capture_get_ext_list_ptr(struct __guc_mmio_reg_descr_group * lists, u32 owner, u32 type, u32 class) +{ + while(lists->list){ + if (lists->owner == owner && lists->type == type && lists->engine == class) + break; + ++lists; + } + if (!lists->list) + return NULL; + + return &(lists->ext); +} + +void guc_capture_clear_ext_regs(struct __guc_mmio_reg_descr_group * lists) +{ + while(lists->list){ + if (lists->ext) { + kfree(lists->ext); + lists->ext = NULL; + } + ++lists; + } + return; +} + +static void +xelpd_alloc_steered_ext_list(struct drm_i915_private *i915, + struct __guc_mmio_reg_descr_group * lists) +{ + struct intel_gt *gt = &i915->gt; + struct sseu_dev_info *sseu; + int slice, subslice, i, num_tot_regs = 0; + struct __guc_mmio_reg_descr **ext; + static char * const strings[] = { + [0] = "GEN7_SAMPLER_INSTDONE", + [1] = "GEN7_ROW_INSTDONE", + }; + + /* In XE_LP we only care about render-class steering registers during error-capture */ + ext = guc_capture_get_ext_list_ptr(lists, GUC_CAPTURE_LIST_INDEX_PF, + GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, GUC_RENDER_CLASS); + if (!ext) + return; + if (*ext) + return; /* already populated */ + + sseu = >->info.sseu; + for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { + num_tot_regs += 2; /* two registers of interest for now */ + } + if (!num_tot_regs) + return; + + *ext = kzalloc(2 * num_tot_regs * sizeof(struct __guc_mmio_reg_descr), GFP_KERNEL); + if (!*ext) { + drm_warn(&i915->drm, "GuC-capture: Fail to allocate for extended registers\n"); + return; + } + + for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { + for (i = 0; i < 2; i++) { + if (i == 0) + (*ext)->reg = GEN7_SAMPLER_INSTDONE; + else + (*ext)->reg = GEN7_ROW_INSTDONE; + (*ext)->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice); + (*ext)->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice); + (*ext)->regname = strings[i]; + (*ext)++; + } + } +} static struct __guc_mmio_reg_descr_group * guc_capture_get_device_reglist(struct drm_i915_private *dev_priv) { if (IS_TIGERLAKE(dev_priv) || IS_ROCKETLAKE(dev_priv) || IS_ALDERLAKE_S(dev_priv) || IS_ALDERLAKE_P(dev_priv)) { - return gen12lp_lists; + /* + * For certain engine classes, there are slice and subslice + * level registers requiring steering. We allocate and populate + * these at init time based on hw config add it as an extension + * list at the end of the pre-populated render list. + */ + xelpd_alloc_steered_ext_list(dev_priv, xe_lpd_lists); + return xe_lpd_lists; } return NULL; @@ -221,6 +346,7 @@ int intel_guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 void intel_guc_capture_destroy(struct intel_guc *guc) { + guc_capture_clear_ext_regs(guc->capture.reglists); } int intel_guc_capture_init(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h index 352940b8bc87..df420f0f49b3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h @@ -25,6 +25,8 @@ struct __guc_mmio_reg_descr_group { u32 owner; /* see enum guc_capture_owner */ u32 type; /* see enum guc_capture_type */ u32 engine; /* as per MAX_ENGINE_CLASS */ + int num_ext; + struct __guc_mmio_reg_descr * ext; }; struct intel_guc_state_capture { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 1a1d2271c7e9..c26cfefd916c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -267,6 +267,8 @@ struct guc_mmio_reg { u32 value; u32 flags; #define GUC_REGSET_MASKED (1 << 0) +#define GUC_REGSET_STEERING_GROUP GENMASK(15, 12) +#define GUC_REGSET_STEERING_INSTANCE GENMASK(23, 20) u32 mask; } __packed; From patchwork Mon Nov 22 23:03:59 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alan Previn X-Patchwork-Id: 12633095 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id 75163C433EF for ; Mon, 22 Nov 2021 23:03:06 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 7FC2489F2E; Mon, 22 Nov 2021 23:02:56 +0000 (UTC) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by gabe.freedesktop.org (Postfix) with ESMTPS id 5949E89ECD for ; Mon, 22 Nov 2021 23:02:53 +0000 (UTC) X-IronPort-AV: E=McAfee;i="6200,9189,10176"; a="215612622" X-IronPort-AV: E=Sophos;i="5.87,255,1631602800"; d="scan'208";a="215612622" Received: from orsmga005.jf.intel.com ([10.7.209.41]) by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 22 Nov 2021 15:02:52 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.87,255,1631602800"; d="scan'208";a="674237071" Received: from aalteres-desk.fm.intel.com ([10.80.57.53]) by orsmga005.jf.intel.com with ESMTP; 22 Nov 2021 15:02:51 -0800 From: Alan Previn To: intel-gfx@lists.freedesktop.org Date: Mon, 22 Nov 2021 15:03:59 -0800 Message-Id: <20211122230402.2023576-5-alan.previn.teres.alexis@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20211122230402.2023576-1-alan.previn.teres.alexis@intel.com> References: <20211122230402.2023576-1-alan.previn.teres.alexis@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [RFC 4/7] drm/i915/guc: Add GuC's error state capture output structures. X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Alan Previn Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Add GuC's error capture output structures and definitions as how they would appear in GuC log buffer's error capture subregion after an error state capture G2H event notification. Signed-off-by: Alan Previn Reviewed-by: Matthew Brost --- .../gpu/drm/i915/gt/uc/intel_guc_capture.h | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h index df420f0f49b3..b2454b6cd778 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h @@ -29,6 +29,41 @@ struct __guc_mmio_reg_descr_group { struct __guc_mmio_reg_descr * ext; }; +struct intel_guc_capture_out_data_header { + u32 reserved1; + u32 info; + #define GUC_CAPTURE_DATAHDR_SRC_TYPE GENMASK(3, 0) /* as per enum guc_capture_type */ + #define GUC_CAPTURE_DATAHDR_SRC_CLASS GENMASK(7, 4) /* as per GUC_MAX_ENGINE_CLASSES */ + #define GUC_CAPTURE_DATAHDR_SRC_INSTANCE GENMASK(11, 8) + u32 lrca; /* if type-instance, LRCA (address) that hung, else set to ~0 */ + u32 guc_ctx_id; /* if type-instance, context index of hung context, else set to ~0 */ + u32 num_mmios; + #define GUC_CAPTURE_DATAHDR_NUM_MMIOS GENMASK(9, 0) +}; + +struct intel_guc_capture_out_data { + struct intel_guc_capture_out_data_header capture_header; + struct guc_mmio_reg capture_list[0]; +}; + +enum guc_capture_group_types { + GUC_STATE_CAPTURE_GROUP_TYPE_FULL, + GUC_STATE_CAPTURE_GROUP_TYPE_PARTIAL, + GUC_STATE_CAPTURE_GROUP_TYPE_MAX, +}; + +struct intel_guc_capture_out_group_header { + u32 reserved1; + u32 info; + #define GUC_CAPTURE_GRPHDR_SRC_NUMCAPTURES GENMASK(7, 0) + #define GUC_CAPTURE_GRPHDR_SRC_CAPTURE_TYPE GENMASK(15, 8) +}; + +struct intel_guc_capture_out_group { + struct intel_guc_capture_out_group_header group_header; + struct intel_guc_capture_out_data group_lists[0]; +}; + struct intel_guc_state_capture { struct __guc_mmio_reg_descr_group *reglists; u16 num_instance_regs[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; From patchwork Mon Nov 22 23:04:00 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alan Previn X-Patchwork-Id: 12633091 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id D0B85C433FE for ; Mon, 22 Nov 2021 23:03:02 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id DBCE889F07; Mon, 22 Nov 2021 23:02:55 +0000 (UTC) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by gabe.freedesktop.org (Postfix) with ESMTPS id 75AD889ED6 for ; Mon, 22 Nov 2021 23:02:53 +0000 (UTC) X-IronPort-AV: E=McAfee;i="6200,9189,10176"; a="215612623" X-IronPort-AV: E=Sophos;i="5.87,255,1631602800"; d="scan'208";a="215612623" Received: from orsmga005.jf.intel.com ([10.7.209.41]) by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 22 Nov 2021 15:02:52 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.87,255,1631602800"; d="scan'208";a="674237079" Received: from aalteres-desk.fm.intel.com ([10.80.57.53]) by orsmga005.jf.intel.com with ESMTP; 22 Nov 2021 15:02:52 -0800 From: Alan Previn To: intel-gfx@lists.freedesktop.org Date: Mon, 22 Nov 2021 15:04:00 -0800 Message-Id: <20211122230402.2023576-6-alan.previn.teres.alexis@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20211122230402.2023576-1-alan.previn.teres.alexis@intel.com> References: <20211122230402.2023576-1-alan.previn.teres.alexis@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [RFC 5/7] drm/i915/guc: Update GuC's log-buffer-state access for error capture. X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Alan Previn Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" GuC log buffer regions for debug-log-events, crash-dumps and error-state-capture are all a single bo allocation that includes the guc_log_buffer_state structures. Since the error-capture region is accessed with high priority at non- deterministic times (as part of gpu coredump) while the debug-log-event region is populated and accessed with different priorities, timings and consumers, let's split out separate locks for buffer-state accesses of each region. Also, ensure a global mapping is made up front for the entire bo throughout GuC operation so that dynamic mapping and unmapping isn't required for error capture log access if relay-logging isn't running. Additionally, while here, make some readibility improvements: 1. change previous function names with "capture_logs" to "copy_debug_logs" to help make the distinction clearer. 2. Update the guc log region mapping comments to order them according to the enum definition as per the GuC interface. Signed-off-by: Alan Previn --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 + .../gpu/drm/i915/gt/uc/intel_guc_capture.c | 46 +++++++ .../gpu/drm/i915/gt/uc/intel_guc_capture.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 120 ++++++++++++------ drivers/gpu/drm/i915/gt/uc/intel_guc_log.h | 14 +- 5 files changed, 137 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index d136c69abe12..e0db21bbffdd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -34,6 +34,8 @@ struct intel_guc { struct intel_uc_fw fw; /** @log: sub-structure containing GuC log related data and objects */ struct intel_guc_log log; + /** @log_state: states and locks for each subregion of GuC's log buffer */ + struct intel_guc_log_stats log_state[GUC_MAX_LOG_BUFFER]; /** @ct: the command transport communication channel */ struct intel_guc_ct ct; /** @slpc: sub-structure containing SLPC related data and objects */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index eec1d193ac26..0cb358a98605 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -344,6 +344,52 @@ int intel_guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 return -ENODATA; } +int intel_guc_capture_output_min_size_est(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + int worst_min_size = 0, num_regs = 0; + u16 tmp = 0; + + /* + * If every single engine-instance suffered a failure in quick succession but + * were all unrelated, then a burst of multiple error-capture events would dump + * registers for every one engine instance, one at a time. In this case, GuC + * would even dump the global-registers repeatedly. + * + * For each engine instance, there would be 1 x intel_guc_capture_out_group output + * followed by 3 x intel_guc_capture_out_data lists. The latter is how the register + * dumps are split across different register types (where the '3' are global vs class + * vs instance). Finally, let's multiply the whole thing by 3x (just so we are + * not limited to just 1 rounds of data in a worst case full register dump log) + * + * NOTE: intel_guc_log that allocates the log buffer would round this size up to + * a power of two. + */ + + for_each_engine(engine, gt, id) { + worst_min_size += sizeof(struct intel_guc_capture_out_group_header) + + (3 * sizeof(struct intel_guc_capture_out_data_header)); + + if (!intel_guc_capture_list_count(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp)) + num_regs += tmp; + + if (!intel_guc_capture_list_count(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, + engine->class, &tmp)) { + num_regs += tmp; + } + if (!intel_guc_capture_list_count(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, + engine->class, &tmp)) { + num_regs += tmp; + } + } + + worst_min_size += (num_regs * sizeof(struct guc_mmio_reg)); + + return (worst_min_size * 3); +} + void intel_guc_capture_destroy(struct intel_guc *guc) { guc_capture_clear_ext_regs(guc->capture.reglists); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h index b2454b6cd778..839b53425e1e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h @@ -78,6 +78,7 @@ int intel_guc_capture_list_count(struct intel_guc *guc, u32 owner, u32 type, u32 u16 *num_entries); int intel_guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 class, struct guc_mmio_reg *ptr, u16 num_entries); +int intel_guc_capture_output_min_size_est(struct intel_guc *guc); void intel_guc_capture_destroy(struct intel_guc *guc); int intel_guc_capture_init(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 1962a43302a8..dd86530f77a1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -10,7 +10,7 @@ #include "i915_memcpy.h" #include "intel_guc_log.h" -static void guc_log_capture_logs(struct intel_guc_log *log); +static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log); /** * DOC: GuC firmware log @@ -149,7 +149,7 @@ static void guc_move_to_next_buf(struct intel_guc_log *log) smp_wmb(); /* All data has been written, so now move the offset of sub buffer. */ - relay_reserve(log->relay.channel, log->vma->obj->base.size); + relay_reserve(log->relay.channel, log->vma->obj->base.size - CAPTURE_BUFFER_SIZE); /* Switch to the next sub buffer */ relay_flush(log->relay.channel); @@ -169,25 +169,25 @@ static void *guc_get_write_buffer(struct intel_guc_log *log) return relay_reserve(log->relay.channel, 0); } -static bool guc_check_log_buf_overflow(struct intel_guc_log *log, - enum guc_log_buffer_type type, - unsigned int full_cnt) +bool guc_check_log_buf_overflow(struct intel_guc *guc, + struct intel_guc_log_stats *log_state, + unsigned int full_cnt) { - unsigned int prev_full_cnt = log->stats[type].sampled_overflow; + unsigned int prev_full_cnt = log_state->sampled_overflow; bool overflow = false; if (full_cnt != prev_full_cnt) { overflow = true; - log->stats[type].overflow = full_cnt; - log->stats[type].sampled_overflow += full_cnt - prev_full_cnt; + log_state->overflow = full_cnt; + log_state->sampled_overflow += full_cnt - prev_full_cnt; if (full_cnt < prev_full_cnt) { /* buffer_full_cnt is a 4 bit counter */ - log->stats[type].sampled_overflow += 16; + log_state->sampled_overflow += 16; } - dev_notice_ratelimited(guc_to_gt(log_to_guc(log))->i915->drm.dev, + dev_notice_ratelimited(guc_to_gt(guc)->i915->drm.dev, "GuC log buffer overflow\n"); } @@ -210,8 +210,10 @@ static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) return 0; } -static void guc_read_update_log_buffer(struct intel_guc_log *log) +static void _guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) { + struct intel_guc *guc = log_to_guc(log); + struct intel_guc_log_stats *logstate; unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_cnt; struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state; struct guc_log_buffer_state log_buf_state_local; @@ -235,7 +237,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) * Used rate limited to avoid deluge of messages, logs might be * getting consumed by User at a slow rate. */ - DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n"); + DRM_ERROR_RATELIMITED("no sub-buffer to copy general logs\n"); log->relay.full_count++; goto out_unlock; @@ -245,12 +247,16 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) src_data += PAGE_SIZE; dst_data += PAGE_SIZE; - for (type = GUC_DEBUG_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { + /* For relay logging, we exclude error state capture */ + for (type = GUC_DEBUG_LOG_BUFFER; type <= GUC_CRASH_DUMP_LOG_BUFFER; type++) { /* + * Get a lock to the buffer_state we want to read and update. * Make a copy of the state structure, inside GuC log buffer * (which is uncached mapped), on the stack to avoid reading * from it multiple times. */ + logstate = &guc->log_state[type]; + mutex_lock(&logstate->lock); memcpy(&log_buf_state_local, log_buf_state, sizeof(struct guc_log_buffer_state)); buffer_size = guc_get_log_buffer_size(type); @@ -259,13 +265,14 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) full_cnt = log_buf_state_local.buffer_full_cnt; /* Bookkeeping stuff */ - log->stats[type].flush += log_buf_state_local.flush_to_file; - new_overflow = guc_check_log_buf_overflow(log, type, full_cnt); + logstate->flush += log_buf_state_local.flush_to_file; + new_overflow = guc_check_log_buf_overflow(guc, logstate, full_cnt); /* Update the state of shared log buffer */ log_buf_state->read_ptr = write_offset; log_buf_state->flush_to_file = 0; log_buf_state++; + mutex_unlock(&logstate->lock); /* First copy the state structure in snapshot buffer */ memcpy(log_buf_snapshot_state, &log_buf_state_local, @@ -313,15 +320,15 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) mutex_unlock(&log->relay.lock); } -static void capture_logs_work(struct work_struct *work) +static void copy_debug_logs_work(struct work_struct *work) { struct intel_guc_log *log = container_of(work, struct intel_guc_log, relay.flush_work); - guc_log_capture_logs(log); + guc_log_copy_debuglogs_for_relay(log); } -static int guc_log_map(struct intel_guc_log *log) +static int guc_log_relay_map(struct intel_guc_log *log) { void *vaddr; @@ -333,7 +340,9 @@ static int guc_log_map(struct intel_guc_log *log) /* * Create a WC (Uncached for read) vmalloc mapping of log * buffer pages, so that we can directly get the data - * (up-to-date) from memory. + * (up-to-date) from memory. This has already been + * mapped at GuC Init time (for error-state-capture), but + * call it again anyway for book-keeping */ vaddr = i915_gem_object_pin_map_unlocked(log->vma->obj, I915_MAP_WC); if (IS_ERR(vaddr)) @@ -344,7 +353,7 @@ static int guc_log_map(struct intel_guc_log *log) return 0; } -static void guc_log_unmap(struct intel_guc_log *log) +static void guc_log_relay_unmap(struct intel_guc_log *log) { lockdep_assert_held(&log->relay.lock); @@ -354,8 +363,14 @@ static void guc_log_unmap(struct intel_guc_log *log) void intel_guc_log_init_early(struct intel_guc_log *log) { + struct intel_guc *guc = log_to_guc(log); + int n; + + for (n = GUC_DEBUG_LOG_BUFFER; n < GUC_MAX_LOG_BUFFER; n++) + mutex_init(&guc->log_state[n].lock); + mutex_init(&log->relay.lock); - INIT_WORK(&log->relay.flush_work, capture_logs_work); + INIT_WORK(&log->relay.flush_work, copy_debug_logs_work); log->relay.started = false; } @@ -370,8 +385,11 @@ static int guc_log_relay_create(struct intel_guc_log *log) lockdep_assert_held(&log->relay.lock); GEM_BUG_ON(!log->vma); - /* Keep the size of sub buffers same as shared log buffer */ - subbuf_size = log->vma->size; + /* + * Keep the size of sub buffers same as shared log buffer + * but GuC log-events excludes the error-state-capture logs + */ + subbuf_size = log->vma->size - CAPTURE_BUFFER_SIZE; /* * Store up to 8 snapshots, which is large enough to buffer sufficient @@ -406,13 +424,13 @@ static void guc_log_relay_destroy(struct intel_guc_log *log) log->relay.channel = NULL; } -static void guc_log_capture_logs(struct intel_guc_log *log) +static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; intel_wakeref_t wakeref; - guc_read_update_log_buffer(log); + _guc_log_copy_debuglogs_for_relay(log); /* * Generally device is expected to be active only at this @@ -452,6 +470,7 @@ int intel_guc_log_create(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); struct i915_vma *vma; + void *vaddr; u32 guc_log_size; int ret; @@ -459,23 +478,31 @@ int intel_guc_log_create(struct intel_guc_log *log) /* * GuC Log buffer Layout + * (this ordering must follow "enum guc_log_buffer_type" definition) * * +===============================+ 00B - * | Crash dump state header | - * +-------------------------------+ 32B * | Debug state header | + * +-------------------------------+ 32B + * | Crash dump state header | + * +-------------------------------+ 64B + * | Capture state header | * +-------------------------------+ 64B * | Capture state header | * +-------------------------------+ 96B * | | * +===============================+ PAGE_SIZE (4KB) - * | Crash Dump logs | - * +===============================+ + CRASH_SIZE * | Debug logs | * +===============================+ + DEBUG_SIZE + * | Crash Dump logs | + * +===============================+ + CRASH_SIZE + * | Capture logs | + * +===============================+ + CAPTURE_SIZE */ - guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + - CAPTURE_BUFFER_SIZE; + if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE) + DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n", + CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc)); + + guc_log_size = PAGE_SIZE + DEBUG_BUFFER_SIZE + CRASH_BUFFER_SIZE + CAPTURE_BUFFER_SIZE; vma = intel_guc_allocate_vma(guc, guc_log_size); if (IS_ERR(vma)) { @@ -484,6 +511,17 @@ int intel_guc_log_create(struct intel_guc_log *log) } log->vma = vma; + /* + * Create a WC (Uncached for read) vmalloc mapping up front immediate access to + * data from memory during critical events such as error capture + */ + vaddr = i915_gem_object_pin_map_unlocked(log->vma->obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + i915_vma_unpin_and_release(&log->vma, 0); + goto err; + } + log->buf_addr = vaddr; log->level = __get_default_log_level(log); DRM_DEBUG_DRIVER("guc_log_level=%d (%s, verbose:%s, verbosity:%d)\n", @@ -494,13 +532,14 @@ int intel_guc_log_create(struct intel_guc_log *log) return 0; err: - DRM_ERROR("Failed to allocate GuC log buffer. %d\n", ret); + DRM_ERROR("Failed to allocate or map GuC log buffer. %d\n", ret); return ret; } void intel_guc_log_destroy(struct intel_guc_log *log) { - i915_vma_unpin_and_release(&log->vma, 0); + log->buf_addr = NULL; + i915_vma_unpin_and_release(&log->vma, I915_VMA_RELEASE_MAP); } int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) @@ -545,7 +584,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) bool intel_guc_log_relay_created(const struct intel_guc_log *log) { - return log->relay.buf_addr; + return log->buf_addr; } int intel_guc_log_relay_open(struct intel_guc_log *log) @@ -576,7 +615,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) if (ret) goto out_unlock; - ret = guc_log_map(log); + ret = guc_log_relay_map(log); if (ret) goto out_relay; @@ -628,8 +667,8 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) with_intel_runtime_pm(guc_to_gt(guc)->uncore->rpm, wakeref) guc_action_flush_log(guc); - /* GuC would have updated log buffer by now, so capture it */ - guc_log_capture_logs(log); + /* GuC would have updated log buffer by now, so copy it */ + guc_log_copy_debuglogs_for_relay(log); } /* @@ -659,7 +698,7 @@ void intel_guc_log_relay_close(struct intel_guc_log *log) mutex_lock(&log->relay.lock); GEM_BUG_ON(!intel_guc_log_relay_created(log)); - guc_log_unmap(log); + guc_log_relay_unmap(log); guc_log_relay_destroy(log); mutex_unlock(&log->relay.lock); } @@ -695,6 +734,7 @@ stringify_guc_log_type(enum guc_log_buffer_type type) */ void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p) { + struct intel_guc *guc = log_to_guc(log); enum guc_log_buffer_type type; if (!intel_guc_log_relay_created(log)) { @@ -709,8 +749,8 @@ void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p) for (type = GUC_DEBUG_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { drm_printf(p, "\t%s:\tflush count %10u, overflow count %10u\n", stringify_guc_log_type(type), - log->stats[type].flush, - log->stats[type].sampled_overflow); + guc->log_state[type].flush, + guc->log_state[type].sampled_overflow); } } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 9d9004dc58f1..2968023f7447 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -42,9 +42,17 @@ struct intel_guc; #define GUC_VERBOSITY_TO_LOG_LEVEL(x) ((x) + 2) #define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX) +struct intel_guc_log_stats { + struct mutex lock; /* protects below and guc_log_buffer_state's read-ptr */ + u32 sampled_overflow; + u32 overflow; + u32 flush; +}; + struct intel_guc_log { u32 level; struct i915_vma *vma; + void *buf_addr; struct { void *buf_addr; bool started; @@ -53,12 +61,6 @@ struct intel_guc_log { struct mutex lock; u32 full_count; } relay; - /* logging related stats */ - struct { - u32 sampled_overflow; - u32 overflow; - u32 flush; - } stats[GUC_MAX_LOG_BUFFER]; }; void intel_guc_log_init_early(struct intel_guc_log *log); From patchwork Mon Nov 22 23:04:01 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alan Previn X-Patchwork-Id: 12633093 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id 9805AC433F5 for ; Mon, 22 Nov 2021 23:03:04 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 30F7689F19; Mon, 22 Nov 2021 23:02:57 +0000 (UTC) Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by gabe.freedesktop.org (Postfix) with ESMTPS id 75EFD89EF7 for ; Mon, 22 Nov 2021 23:02:53 +0000 (UTC) X-IronPort-AV: E=McAfee;i="6200,9189,10176"; a="234850316" X-IronPort-AV: E=Sophos;i="5.87,255,1631602800"; d="scan'208";a="234850316" Received: from orsmga005.jf.intel.com ([10.7.209.41]) by orsmga103.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 22 Nov 2021 15:02:52 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.87,255,1631602800"; d="scan'208";a="674237084" Received: from aalteres-desk.fm.intel.com ([10.80.57.53]) by orsmga005.jf.intel.com with ESMTP; 22 Nov 2021 15:02:52 -0800 From: Alan Previn To: intel-gfx@lists.freedesktop.org Date: Mon, 22 Nov 2021 15:04:01 -0800 Message-Id: <20211122230402.2023576-7-alan.previn.teres.alexis@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20211122230402.2023576-1-alan.previn.teres.alexis@intel.com> References: <20211122230402.2023576-1-alan.previn.teres.alexis@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [RFC 6/7] drm/i915/guc: Copy new GuC error capture logs upon G2H notification. X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Alan Previn Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Upon the G2H Notify-Err-Capture event, queue a worker to make a snapshot of the error state capture logs from the GuC-log buffer (error capture region) into an bigger interim circular buffer store that can be parsed later during gpu coredump printing. Also, call that worker function directly for the cases where we are resetting GuC submission and need to flush outstanding logs. Signed-off-by: Alan Previn --- .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 7 + .../gpu/drm/i915/gt/uc/intel_guc_capture.c | 206 ++++++++++++++++++ .../gpu/drm/i915/gt/uc/intel_guc_capture.h | 16 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 16 +- drivers/gpu/drm/i915/gt/uc/intel_guc_log.h | 5 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 10 +- 6 files changed, 256 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 5af03a486a13..c130f465c19a 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -178,4 +178,11 @@ enum intel_guc_sleep_state_status { #define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT) #define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8) +enum intel_guc_state_capture_event_status { + INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_SUCCESS = 0x0, + INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE = 0x1, +}; + +#define INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK 0x1 + #endif /* _ABI_GUC_ACTIONS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 0cb358a98605..459fe81c77ae 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -11,8 +11,11 @@ #include "gt/intel_gt.h" #include "gt/intel_lrc_reg.h" +#include + #include "intel_guc_fwif.h" #include "intel_guc_capture.h" +#include "i915_gpu_error.h" /* * Define all device tables of GuC error capture register lists @@ -390,15 +393,218 @@ int intel_guc_capture_output_min_size_est(struct intel_guc *guc) return (worst_min_size * 3); } +/* + * KMD Init time flows: + * -------------------- + * --> alloc A: GuC input capture regs lists (registered via ADS) + * List acquired via intel_guc_capture_list_count + intel_guc_capture_list_init + * Size = global-reg-list + (class-reg-list) + (num-instances x instance-reg-list) + * Device tables carry: 1x global, 1x per-class, 1x per-instance) + * Caller needs to call per-class and per-instance multiplie times + * + * --> alloc B: GuC output capture buf (registered via guc_init_params(log_param)) + * Size = #define CAPTURE_BUFFER_SIZE (warns if on too-small) + * Note2: 'x 3' to hold multiple capture groups + * + * --> alloc C: GuC capture interim circular buffer storage in system mem + * Size = 'power_of_two(sizeof(B))' as per kernel circular buffer helper + * + * GUC Runtime notify capture: + * -------------------------- + * --> G2H STATE_CAPTURE_NOTIFICATION + * L--> intel_guc_capture_store_snapshot + * L--> queue(__guc_capture_store_snapshot_work) + * Copies from B (head->tail) into C + */ + +static void guc_capture_store_insert(struct intel_guc *guc, struct guc_capture_out_store *store, + unsigned char *new_data, size_t bytes) +{ + struct drm_i915_private *dev_priv = (guc_to_gt(guc))->i915; + unsigned char *dst_data = store->addr; + unsigned long h, t; + size_t tmp; + + h = store->head; + t = store->tail; + if (CIRC_SPACE(h, t, store->size) >= bytes) { + while (bytes) { + tmp = CIRC_SPACE_TO_END(h, t, store->size); + if (tmp) { + tmp = tmp < bytes ? tmp : bytes; + i915_unaligned_memcpy_from_wc(&dst_data[h], new_data, tmp); + bytes -= tmp; + new_data += tmp; + h = (h + tmp) & (store->size - 1); + } else { + drm_err(&dev_priv->drm, "circbuf copy-to ptr-corruption!\n"); + break; + } + } + store->head = h; + } else { + drm_err(&dev_priv->drm, "GuC capture interim-store insufficient space!\n"); + } +} + +static void __guc_capture_store_snapshot_work(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = (guc_to_gt(guc))->i915; + unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_count; + struct guc_log_buffer_state *log_buf_state; + struct guc_log_buffer_state log_buf_state_local; + void *src_data, *dst_data = NULL; + bool new_overflow; + + /* Lock to get the pointer to GuC capture-log-buffer-state */ + mutex_lock(&guc->log_state[GUC_CAPTURE_LOG_BUFFER].lock); + log_buf_state = guc->log.buf_addr + + (sizeof(struct guc_log_buffer_state) * GUC_CAPTURE_LOG_BUFFER); + src_data = guc->log.buf_addr + guc_get_log_buffer_offset(GUC_CAPTURE_LOG_BUFFER); + + /* + * Make a copy of the state structure, inside GuC log buffer + * (which is uncached mapped), on the stack to avoid reading + * from it multiple times. + */ + memcpy(&log_buf_state_local, log_buf_state, sizeof(struct guc_log_buffer_state)); + buffer_size = guc_get_log_buffer_size(GUC_CAPTURE_LOG_BUFFER); + read_offset = log_buf_state_local.read_ptr; + write_offset = log_buf_state_local.sampled_write_ptr; + full_count = log_buf_state_local.buffer_full_cnt; + + /* Bookkeeping stuff */ + guc->log_state[GUC_CAPTURE_LOG_BUFFER].flush += log_buf_state_local.flush_to_file; + new_overflow = guc_check_log_buf_overflow(guc, &guc->log_state[GUC_CAPTURE_LOG_BUFFER], + full_count); + + /* Update the state of shared log buffer */ + log_buf_state->read_ptr = write_offset; + log_buf_state->flush_to_file = 0; + + mutex_unlock(&guc->log_state[GUC_CAPTURE_LOG_BUFFER].lock); + + dst_data = guc->capture.out_store.addr; + if (dst_data) { + mutex_lock(&guc->capture.out_store.lock); + + /* Now copy the actual logs. */ + if (unlikely(new_overflow)) { + /* copy the whole buffer in case of overflow */ + read_offset = 0; + write_offset = buffer_size; + } else if (unlikely((read_offset > buffer_size) || + (write_offset > buffer_size))) { + drm_err(&dev_priv->drm, "invalid GuC log capture buffer state!\n"); + /* copy whole buffer as offsets are unreliable */ + read_offset = 0; + write_offset = buffer_size; + } + + /* first copy from the tail end of the GuC log capture buffer */ + if (read_offset > write_offset) { + guc_capture_store_insert(guc, &guc->capture.out_store, src_data, + write_offset); + bytes_to_copy = buffer_size - read_offset; + } else { + bytes_to_copy = write_offset - read_offset; + } + guc_capture_store_insert(guc, &guc->capture.out_store, src_data + read_offset, + bytes_to_copy); + + mutex_unlock(&guc->capture.out_store.lock); + } +} + +static void guc_capture_store_snapshot_work(struct work_struct *work) +{ + struct intel_guc_state_capture *capture = + container_of(work, struct intel_guc_state_capture, store_work); + struct intel_guc *guc = + container_of(capture, struct intel_guc, capture); + + __guc_capture_store_snapshot_work(guc); +} + +void intel_guc_capture_store_snapshot(struct intel_guc *guc) +{ + if (guc->capture.enabled) + queue_work(system_highpri_wq, &guc->capture.store_work); +} + +void intel_guc_capture_store_snapshot_immediate(struct intel_guc *guc) +{ + if (guc->capture.enabled) + __guc_capture_store_snapshot_work(guc); +} + +static void guc_capture_store_destroy(struct intel_guc *guc) +{ + mutex_destroy(&guc->capture.out_store.lock); + mutex_destroy(&guc->capture.out_store.lock); + guc->capture.out_store.size = 0; + kfree(guc->capture.out_store.addr); + guc->capture.out_store.addr = NULL; +} + +static int guc_capture_store_create(struct intel_guc *guc) +{ + /* + * Make this interim buffer 3x the GuC capture output buffer so that we can absorb + * a little delay when processing the raw capture dumps into text friendly logs + * for the i915_gpu_coredump output + */ + size_t max_dump_size; + struct drm_i915_private *dev_priv = (guc_to_gt(guc))->i915; + + GEM_BUG_ON(guc->capture.out_store.addr); + + max_dump_size = PAGE_ALIGN(intel_guc_capture_output_min_size_est(guc)); + max_dump_size = roundup_pow_of_two(max_dump_size); + + guc->capture.out_store.addr = kzalloc(max_dump_size, GFP_KERNEL); + if (!guc->capture.out_store.addr) { + drm_warn(&dev_priv->drm, "GuC-capture interim-store populated at init!\n"); + return -ENOMEM; + } + guc->capture.out_store.size = max_dump_size; + mutex_init(&guc->capture.out_store.lock); + mutex_init(&guc->capture.out_store.lock); + + return 0; +} + void intel_guc_capture_destroy(struct intel_guc *guc) { + if (!guc->capture.enabled) + return; + + guc->capture.enabled = false; + + intel_synchronize_irq(guc_to_gt(guc)->i915); + flush_work(&guc->capture.store_work); + guc_capture_store_destroy(guc); guc_capture_clear_ext_regs(guc->capture.reglists); } int intel_guc_capture_init(struct intel_guc *guc) { struct drm_i915_private *dev_priv = (guc_to_gt(guc))->i915; + int ret; guc->capture.reglists = guc_capture_get_device_reglist(dev_priv); + /* + * allocate interim store at init time so we dont require memory + * allocation whilst in the midst of the reset + capture + */ + ret = guc_capture_store_create(guc); + if (ret) { + guc_capture_clear_ext_regs(guc->capture.reglists); + return ret; + } + + INIT_WORK(&guc->capture.store_work, guc_capture_store_snapshot_work); + guc->capture.enabled = true; + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h index 839b53425e1e..7031de12f3a1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h @@ -64,7 +64,19 @@ struct intel_guc_capture_out_group { struct intel_guc_capture_out_data group_lists[0]; }; +struct guc_capture_out_store { + /* An interim storage to copy the GuC error-capture-output before + * parsing and reporting via proper reporting flows with formatting. + */ + unsigned char *addr; + size_t size; + unsigned long head; /* inject new output capture data */ + unsigned long tail; /* remove output capture data when reporting */ + struct mutex lock; /*lock head or tail when copying capture in or extracting out*/ +}; + struct intel_guc_state_capture { + bool enabled; struct __guc_mmio_reg_descr_group *reglists; u16 num_instance_regs[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; u16 num_class_regs[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; @@ -72,14 +84,18 @@ struct intel_guc_state_capture { int instance_list_size; int class_list_size; int global_list_size; + struct guc_capture_out_store out_store; + struct work_struct store_work; }; +void intel_guc_capture_store_snapshot(struct intel_guc *guc); int intel_guc_capture_list_count(struct intel_guc *guc, u32 owner, u32 type, u32 class, u16 *num_entries); int intel_guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 class, struct guc_mmio_reg *ptr, u16 num_entries); int intel_guc_capture_output_min_size_est(struct intel_guc *guc); void intel_guc_capture_destroy(struct intel_guc *guc); +void intel_guc_capture_store_snapshot_immediate(struct intel_guc *guc); int intel_guc_capture_init(struct intel_guc *guc); #endif /* _INTEL_GUC_CAPTURE_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index dd86530f77a1..1354dbde9994 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -194,7 +194,7 @@ bool guc_check_log_buf_overflow(struct intel_guc *guc, return overflow; } -static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) +unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) { switch (type) { case GUC_DEBUG_LOG_BUFFER: @@ -210,6 +210,20 @@ static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) return 0; } +size_t guc_get_log_buffer_offset(enum guc_log_buffer_type type) +{ + enum guc_log_buffer_type i; + size_t offset = PAGE_SIZE;/* for the log_buffer_states */ + + for (i = GUC_DEBUG_LOG_BUFFER; i < GUC_MAX_LOG_BUFFER; i++) { + if (i == type) + break; + offset += guc_get_log_buffer_size(i); + } + + return offset; +} + static void _guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 2968023f7447..9bf29343df0e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -64,8 +64,13 @@ struct intel_guc_log { }; void intel_guc_log_init_early(struct intel_guc_log *log); +unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type); +size_t guc_get_log_buffer_offset(enum guc_log_buffer_type type); int intel_guc_log_create(struct intel_guc_log *log); void intel_guc_log_destroy(struct intel_guc_log *log); + +bool guc_check_log_buf_overflow(struct intel_guc *guc, struct intel_guc_log_stats *state, + unsigned int full_cnt); int intel_guc_log_set_level(struct intel_guc_log *log, u32 level); bool intel_guc_log_relay_created(const struct intel_guc_log *log); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 0bfc92b1b982..0afd9ddd71fc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -24,6 +24,7 @@ #include "intel_guc_ads.h" #include "intel_guc_submission.h" +#include "gt/uc/intel_guc_capture.h" #include "i915_drv.h" #include "i915_trace.h" @@ -1431,6 +1432,8 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) } scrub_guc_desc_for_outstanding_g2h(guc); + + intel_guc_capture_store_snapshot_immediate(guc); } static struct intel_engine_cs * @@ -4013,10 +4016,11 @@ int intel_guc_error_capture_process_msg(struct intel_guc *guc, return -EPROTO; } - status = msg[0]; - drm_info(&guc_to_gt(guc)->i915->drm, "Got error capture: status = %d", status); + status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; + if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) + drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space\n"); - /* Add extraction of error capture dump */ + intel_guc_capture_store_snapshot(guc); return 0; } From patchwork Mon Nov 22 23:04:02 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alan Previn X-Patchwork-Id: 12633097 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id E11CCC433F5 for ; Mon, 22 Nov 2021 23:03:07 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id E264089F3B; Mon, 22 Nov 2021 23:02:57 +0000 (UTC) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by gabe.freedesktop.org (Postfix) with ESMTPS id 93DC189ECD for ; Mon, 22 Nov 2021 23:02:53 +0000 (UTC) X-IronPort-AV: E=McAfee;i="6200,9189,10176"; a="222123297" X-IronPort-AV: E=Sophos;i="5.87,255,1631602800"; d="scan'208";a="222123297" Received: from orsmga005.jf.intel.com ([10.7.209.41]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 22 Nov 2021 15:02:53 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.87,255,1631602800"; d="scan'208";a="674237090" Received: from aalteres-desk.fm.intel.com ([10.80.57.53]) by orsmga005.jf.intel.com with ESMTP; 22 Nov 2021 15:02:52 -0800 From: Alan Previn To: intel-gfx@lists.freedesktop.org Date: Mon, 22 Nov 2021 15:04:02 -0800 Message-Id: <20211122230402.2023576-8-alan.previn.teres.alexis@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20211122230402.2023576-1-alan.previn.teres.alexis@intel.com> References: <20211122230402.2023576-1-alan.previn.teres.alexis@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [RFC 7/7] drm/i915/guc: Print the GuC error capture output register list. X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Alan Previn Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Print the GuC captured error state register list (offsets and values) when gpu_coredump_state printout is invoked. Also, since the GuC can report multiple engine class registers in a single notification event, parse the captured data (appearing as a stream of structures) to identify multiple captures of different 'engine-capture-group-outputs'. Finally, for each 'engine-capture-group-output', identify the last running context and print already-identified vma's so that user's output report follows the same layout as execlist submission. I.e. engine1-registers, engine1-context-vmas, engine2-registers, engine2-context-vmas, etc. Signed-off-by: Alan Previn Signed-off-by: Alan Previn --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 +- .../gpu/drm/i915/gt/uc/intel_guc_capture.c | 389 ++++++++++++++++++ .../gpu/drm/i915/gt/uc/intel_guc_capture.h | 6 + drivers/gpu/drm/i915/i915_gpu_error.c | 53 ++- drivers/gpu/drm/i915/i915_gpu_error.h | 5 + 5 files changed, 439 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 332756036007..5806e2c05212 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1595,9 +1595,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR)); } - if (intel_engine_uses_guc(engine)) { - /* nothing to print yet */ - } else if (HAS_EXECLISTS(dev_priv)) { + if (HAS_EXECLISTS(dev_priv) && !intel_engine_uses_guc(engine)) { struct i915_request * const *port, *rq; const u32 *hws = &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 459fe81c77ae..998ce1b474ed 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -415,8 +415,389 @@ int intel_guc_capture_output_min_size_est(struct intel_guc *guc) * L--> intel_guc_capture_store_snapshot * L--> queue(__guc_capture_store_snapshot_work) * Copies from B (head->tail) into C + * + * GUC --> notify context reset: + * ----------------------------- + * --> G2H CONTEXT RESET + * L--> guc_handle_context_reset --> i915_capture_error_state + * --> i915_gpu_coredump --> intel_guc_capture_store_ptr + * L--> keep a ptr to capture_store in + * i915_gpu_coredump struct. + * + * User Sysfs / Debugfs + * -------------------- + * --> i915_gpu_coredump_copy_to_buffer-> + * L--> err_print_to_sgl --> err_print_gt + * L--> error_print_guc_captures + * L--> loop: intel_guc_capture_out_print_next_group + * */ +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) + +static char * +guc_capture_register_string(const struct intel_guc *guc, u32 owner, u32 type, + u32 class, u32 id, u32 offset) +{ + struct __guc_mmio_reg_descr_group *reglists = guc->capture.reglists; + struct __guc_mmio_reg_descr_group *match; + int num_regs, j = 0; + + if (!reglists) + return NULL; + + match = guc_capture_get_one_list(reglists, owner, type, id); + if (match) { + num_regs = match->num_regs; + while (num_regs--) { + if (offset == match->list[j].reg.reg) + return match->list[j].regname; + ++j; + } + } + + return NULL; +} + +static inline int +guc_capture_store_remove_dw(struct guc_capture_out_store *store, u32 *bytesleft, + u32 *dw) +{ + int tries = 2; + int avail = 0; + u32 *src_data; + + if (!*bytesleft) + return 0; + + while (tries--) { + avail = CIRC_CNT_TO_END(store->head, store->tail, store->size); + if (avail >= sizeof(u32)) { + src_data = (u32 *)(store->addr + store->tail); + *dw = *src_data; + store->tail = (store->tail + 4) & (store->size - 1); + *bytesleft -= 4; + return 4; + } + if (store->tail == (store->size - 1) && store->head > 0) + store->tail = 0; + } + + return 0; +} + +static int +capture_store_get_group_hdr(const struct intel_guc *guc, + struct guc_capture_out_store *store, u32 *bytesleft, + struct intel_guc_capture_out_group_header *group) +{ + int read = 0; + int fullsize = sizeof(struct intel_guc_capture_out_group_header); + + if (fullsize > *bytesleft) + return -1; + + if (CIRC_CNT_TO_END(store->head, store->tail, store->size) >= fullsize) { + memcpy(group, (store->addr + store->tail), fullsize); + store->tail = (store->tail + fullsize) & (store->size - 1); + *bytesleft -= fullsize; + return 0; + } + + read += guc_capture_store_remove_dw(store, bytesleft, &group->reserved1); + read += guc_capture_store_remove_dw(store, bytesleft, &group->info); + if (read != sizeof(*group)) + return -1; + + return 0; +} + +static int +capture_store_get_data_hdr(const struct intel_guc *guc, + struct guc_capture_out_store *store, u32 *bytesleft, + struct intel_guc_capture_out_data_header *data) +{ + int read = 0; + int fullsize = sizeof(struct intel_guc_capture_out_data_header); + + if (fullsize > *bytesleft) + return -1; + + if (CIRC_CNT_TO_END(store->head, store->tail, store->size) >= fullsize) { + memcpy(data, (store->addr + store->tail), fullsize); + store->tail = (store->tail + fullsize) & (store->size - 1); + *bytesleft -= fullsize; + return 0; + } + + read += guc_capture_store_remove_dw(store, bytesleft, &data->reserved1); + read += guc_capture_store_remove_dw(store, bytesleft, &data->info); + read += guc_capture_store_remove_dw(store, bytesleft, &data->lrca); + read += guc_capture_store_remove_dw(store, bytesleft, &data->guc_ctx_id); + read += guc_capture_store_remove_dw(store, bytesleft, &data->num_mmios); + if (read != sizeof(*data)) + return -1; + + return 0; +} + +static int +capture_store_get_register(const struct intel_guc *guc, + struct guc_capture_out_store *store, u32 *bytesleft, + struct guc_mmio_reg *reg) +{ + int read = 0; + int fullsize = sizeof(struct guc_mmio_reg); + + if (fullsize > *bytesleft) + return -1; + + if (CIRC_CNT_TO_END(store->head, store->tail, store->size) >= fullsize) { + memcpy(reg, (store->addr + store->tail), fullsize); + store->tail = (store->tail + fullsize) & (store->size - 1); + *bytesleft -= fullsize; + return 0; + } + + read += guc_capture_store_remove_dw(store, bytesleft, ®->offset); + read += guc_capture_store_remove_dw(store, bytesleft, ®->value); + read += guc_capture_store_remove_dw(store, bytesleft, ®->flags); + read += guc_capture_store_remove_dw(store, bytesleft, ®->mask); + if (read != sizeof(*reg)) + return -1; + + return 0; +} + +static void guc_capture_store_drop_data(struct guc_capture_out_store *store, + unsigned long sampled_head) +{ + if (sampled_head == 0) + store->tail = store->size - 1; + else + store->tail = sampled_head - 1; +} + +#ifdef CONFIG_DRM_I915_DEBUG_GUC +#define guc_capt_err_print(a, b, ...) \ + do { \ + drm_warn(a, __VA_ARGS__); \ + if (b) \ + i915_error_printf(b, __VA_ARGS__); \ + } while (0) +#else +#define guc_capt_err_print(a, b, ...) \ + do { \ + if (b) \ + i915_error_printf(b, __VA_ARGS__); \ + } while (0) +#endif + +static struct intel_engine_cs * +guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) +{ + struct intel_gt *gt = guc_to_gt(guc); + u8 engine_class = guc_class_to_engine_class(guc_class); + + /* Class index is checked in class converter */ + GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); + + return gt->engine_class[engine_class][instance]; +} + +static inline struct intel_context * +guc_context_lookup(struct intel_guc *guc, u32 guc_ctx_id) +{ + struct intel_context *ce; + + if (unlikely(guc_ctx_id >= GUC_MAX_LRC_DESCRIPTORS)) { + drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid guc_ctx_id 0x%X, max 0x%X", + guc_ctx_id, GUC_MAX_LRC_DESCRIPTORS); + return NULL; + } + + ce = xa_load(&guc->context_lookup, guc_ctx_id); + if (unlikely(!ce)) { + drm_dbg(&guc_to_gt(guc)->i915->drm, "Context is NULL, guc_ctx_id 0x%X", + guc_ctx_id); + return NULL; + } + + return ce; +} + + +#define PRINT guc_capt_err_print +#define REGSTR guc_capture_register_string + +#define GCAP_PRINT_INTEL_ENG_INFO(i915, ebuf, eng) \ + PRINT(&(i915->drm), (ebuf), " i915-Eng-Name: %s\n", (eng)->name); \ + PRINT(&(i915->drm), (ebuf), " i915-Eng-Class: 0x%02x\n", (eng)->class); \ + PRINT(&(i915->drm), (ebuf), " i915-Eng-Inst: 0x%02x\n", (eng)->instance); \ + PRINT(&(i915->drm), (ebuf), " i915-Eng-LogicalMask: 0x%08x\n", (eng)->logical_mask) + +#define GCAP_PRINT_GUC_INST_INFO(i915, ebuf, data) \ + PRINT(&(i915->drm), (ebuf), " LRCA: 0x%08x\n", (data).lrca); \ + PRINT(&(i915->drm), (ebuf), " GuC-ContextID: 0x%08x\n", (data).guc_ctx_id); \ + PRINT(&(i915->drm), (ebuf), " GuC-Engine-Instance: 0x%08x\n", \ + (uint32_t) FIELD_GET(GUC_CAPTURE_DATAHDR_SRC_INSTANCE, (data).info)); + +#define GCAP_PRINT_INTEL_CTX_INFO(i915, ebuf, ce) \ + PRINT(&(i915->drm), (ebuf), " i915-Ctx-Flags: 0x%016lx\n", (ce)->flags); \ + PRINT(&(i915->drm), (ebuf), " i915-Ctx-GuC-ID: 0x%016x\n", (ce)->guc_id.id); + +int intel_guc_capture_out_print_next_group(struct drm_i915_error_state_buf *ebuf, + struct intel_gt_coredump *gt) +{ + /* constant qualifier for data-pointers we shouldn't change mid of error dump printing */ + struct intel_guc_state_capture *cap = gt->uc->capture; + struct intel_guc *guc = container_of(cap, struct intel_guc, capture); + struct drm_i915_private *i915 = (container_of(guc, struct intel_gt, + uc.guc))->i915; + struct guc_capture_out_store *store = &cap->out_store; + struct guc_capture_out_store tmpstore; + struct intel_guc_capture_out_group_header group; + struct intel_guc_capture_out_data_header data; + struct guc_mmio_reg reg; + const char *grptypestr[GUC_STATE_CAPTURE_GROUP_TYPE_MAX] = {"full-capture", + "partial-capture"}; + const char *datatypestr[GUC_CAPTURE_LIST_TYPE_MAX] = {"Global", "Engine-Class", + "Engine-Instance"}; + enum guc_capture_group_types grptype; + enum guc_capture_type datatype; + int numgrps, numregs; + char *str, noname[16]; + u32 numbytes, engineclass, eng_inst, ret = 0; + struct intel_engine_cs *eng; + struct intel_context *ce; + + if (!cap->enabled) + return -ENODEV; + + mutex_lock(&store->lock); + smp_mb(); /* sync to get the latest head for the moment */ + /* NOTE1: make a copy of store so we dont have to deal with a changing lower bound of + * occupied-space in this circular buffer. + * NOTE2: Higher up the stack from here, we keep calling this function in a loop to + * reading more capture groups as they appear (as the lower bound of occupied-space + * changes) until this circ-buf is empty. + */ + memcpy(&tmpstore, store, sizeof(tmpstore)); + + PRINT(&i915->drm, ebuf, "global --- GuC Error Capture\n"); + + numbytes = CIRC_CNT(tmpstore.head, tmpstore.tail, tmpstore.size); + if (!numbytes) { + PRINT(&i915->drm, ebuf, "GuC capture stream empty!\n"); + ret = -ENODATA; + goto unlock; + } + /* everything in GuC output structures are dword aligned */ + if (numbytes & 0x3) { + PRINT(&i915->drm, ebuf, "GuC capture stream unaligned!\n"); + ret = -EIO; + goto unlock; + } + + if (capture_store_get_group_hdr(guc, &tmpstore, &numbytes, &group)) { + PRINT(&i915->drm, ebuf, "GuC capture error getting next group-header!\n"); + ret = -EIO; + goto unlock; + } + + PRINT(&i915->drm, ebuf, "NumCaptures: 0x%08x\n", (uint32_t) + FIELD_GET(GUC_CAPTURE_GRPHDR_SRC_NUMCAPTURES, group.info)); + grptype = FIELD_GET(GUC_CAPTURE_GRPHDR_SRC_CAPTURE_TYPE, group.info); + PRINT(&i915->drm, ebuf, "Coverage: 0x%08x = %s\n", grptype, + grptypestr[grptype % GUC_STATE_CAPTURE_GROUP_TYPE_MAX]); + + numgrps = FIELD_GET(GUC_CAPTURE_GRPHDR_SRC_NUMCAPTURES, group.info); + while (numgrps--) { + eng = NULL; + ce = NULL; + + if (capture_store_get_data_hdr(guc, &tmpstore, &numbytes, &data)) { + PRINT(&i915->drm, ebuf, "GuC capture error on next data-header!\n"); + ret = -EIO; + goto unlock; + } + datatype = FIELD_GET(GUC_CAPTURE_DATAHDR_SRC_TYPE, data.info); + PRINT(&i915->drm, ebuf, " RegListType: %s\n", + datatypestr[datatype % GUC_CAPTURE_LIST_TYPE_MAX]); + + engineclass = FIELD_GET(GUC_CAPTURE_DATAHDR_SRC_CLASS, data.info); + if (datatype != GUC_CAPTURE_LIST_TYPE_GLOBAL) { + PRINT(&i915->drm, ebuf, " GuC-Engine-Class: %d\n", + engineclass); + if (engineclass <= GUC_LAST_ENGINE_CLASS) + PRINT(&i915->drm, ebuf, " i915-Eng-Class: %d\n", + guc_class_to_engine_class(engineclass)); + + if (datatype == GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE) { + GCAP_PRINT_GUC_INST_INFO(i915, ebuf, data); + eng_inst = FIELD_GET(GUC_CAPTURE_DATAHDR_SRC_INSTANCE, data.info); + eng = guc_lookup_engine(guc, engineclass, eng_inst); + if (eng) { + GCAP_PRINT_INTEL_ENG_INFO(i915, ebuf, eng); + } else { + PRINT(&i915->drm, ebuf, " i915-Eng-Lookup Fail!\n"); + } + ce = guc_context_lookup(guc, data.guc_ctx_id); + if (ce) { + GCAP_PRINT_INTEL_CTX_INFO(i915, ebuf, ce); + } else { + PRINT(&i915->drm, ebuf, " i915-Ctx-Lookup Fail!\n"); + } + } + } + numregs = FIELD_GET(GUC_CAPTURE_DATAHDR_NUM_MMIOS, data.num_mmios); + PRINT(&i915->drm, ebuf, " NumRegs: 0x%08x\n", numregs); + + while (numregs--) { + if (capture_store_get_register(guc, &tmpstore, &numbytes, ®)) { + PRINT(&i915->drm, ebuf, "Error getting next register!\n"); + ret = -EIO; + goto unlock; + } + str = REGSTR(guc, GUC_CAPTURE_LIST_INDEX_PF, datatype, + engineclass, 0, reg.offset); + if (!str) { + snprintf(noname, sizeof(noname), "REG-0x%08x", reg.offset); + str = noname; + } + PRINT(&i915->drm, ebuf, " %s: 0x%08x\n", str, reg.value); + + } + if (eng) { + const struct intel_engine_coredump *ee; + for (ee = gt->engine; ee; ee = ee->next) { + const struct i915_vma_coredump *vma; + if (ee->engine == eng) { + for (vma = ee->vma; vma; vma = vma->next) + i915_print_error_vma(ebuf, ee->engine, vma); + } + } + } + } + + store->tail = tmpstore.tail; +unlock: + /* if we have a stream error, just drop everything */ + if (ret == -EIO) { + drm_warn(&i915->drm, "Skip GuC capture data print due to stream error\n"); + guc_capture_store_drop_data(store, tmpstore.head); + } + + mutex_unlock(&store->lock); + + return ret; +} + +#undef REGSTR +#undef PRINT + +#endif //CONFIG_DRM_I915_DEBUG_GUC + static void guc_capture_store_insert(struct intel_guc *guc, struct guc_capture_out_store *store, unsigned char *new_data, size_t bytes) { @@ -587,6 +968,14 @@ void intel_guc_capture_destroy(struct intel_guc *guc) guc_capture_clear_ext_regs(guc->capture.reglists); } +struct intel_guc_state_capture * +intel_guc_capture_store_ptr(struct intel_guc *guc) +{ + if (!guc->capture.enabled) + return NULL; + return &guc->capture; +} + int intel_guc_capture_init(struct intel_guc *guc) { struct drm_i915_private *dev_priv = (guc_to_gt(guc))->i915; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h index 7031de12f3a1..7d048a8f6efe 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h @@ -88,6 +88,11 @@ struct intel_guc_state_capture { struct work_struct store_work; }; +struct drm_i915_error_state_buf; +struct intel_gt_coredump; + +int intel_guc_capture_out_print_next_group(struct drm_i915_error_state_buf *m, + struct intel_gt_coredump *gt); void intel_guc_capture_store_snapshot(struct intel_guc *guc); int intel_guc_capture_list_count(struct intel_guc *guc, u32 owner, u32 type, u32 class, u16 *num_entries); @@ -96,6 +101,7 @@ int intel_guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 int intel_guc_capture_output_min_size_est(struct intel_guc *guc); void intel_guc_capture_destroy(struct intel_guc *guc); void intel_guc_capture_store_snapshot_immediate(struct intel_guc *guc); +struct intel_guc_state_capture *intel_guc_capture_store_ptr(struct intel_guc *guc); int intel_guc_capture_init(struct intel_guc *guc); #endif /* _INTEL_GUC_CAPTURE_H */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 2a2d7643b551..47016059c65d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -600,6 +600,16 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, error_print_context(m, " Active context: ", &ee->context); } +static void error_print_guc_captures(struct drm_i915_error_state_buf *m, + struct intel_gt_coredump *gt) +{ + int ret; + + do { + ret = intel_guc_capture_out_print_next_group(m, gt); + } while (!ret); +} + void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) { va_list args; @@ -609,9 +619,9 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) va_end(args); } -static void print_error_vma(struct drm_i915_error_state_buf *m, - const struct intel_engine_cs *engine, - const struct i915_vma_coredump *vma) +void i915_print_error_vma(struct drm_i915_error_state_buf *m, + const struct intel_engine_cs *engine, + const struct i915_vma_coredump *vma) { char out[ASCII85_BUFSZ]; int page; @@ -679,7 +689,7 @@ static void err_print_uc(struct drm_i915_error_state_buf *m, intel_uc_fw_dump(&error_uc->guc_fw, &p); intel_uc_fw_dump(&error_uc->huc_fw, &p); - print_error_vma(m, NULL, error_uc->guc_log); + i915_print_error_vma(m, NULL, error_uc->guc_log); } static void err_free_sgl(struct scatterlist *sgl) @@ -764,12 +774,16 @@ static void err_print_gt(struct drm_i915_error_state_buf *m, err_printf(m, " GAM_DONE: 0x%08x\n", gt->gam_done); } - for (ee = gt->engine; ee; ee = ee->next) { - const struct i915_vma_coredump *vma; + if (gt->uc->capture) /* error capture was via GuC */ + error_print_guc_captures(m, gt); + else { + for (ee = gt->engine; ee; ee = ee->next) { + const struct i915_vma_coredump *vma; - error_print_engine(m, ee); - for (vma = ee->vma; vma; vma = vma->next) - print_error_vma(m, ee->engine, vma); + error_print_engine(m, ee); + for (vma = ee->vma; vma; vma = vma->next) + i915_print_error_vma(m, ee->engine, vma); + } } if (gt->uc) @@ -1140,7 +1154,7 @@ static void gt_record_fences(struct intel_gt_coredump *gt) gt->nfence = i; } -static void engine_record_registers(struct intel_engine_coredump *ee) +static void engine_record_registers_execlist(struct intel_engine_coredump *ee) { const struct intel_engine_cs *engine = ee->engine; struct drm_i915_private *i915 = engine->i915; @@ -1384,8 +1398,10 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp) ee->engine = engine; - engine_record_registers(ee); - engine_record_execlists(ee); + if (!intel_uc_uses_guc_submission(&engine->gt->uc)) { + engine_record_registers_execlist(ee); + engine_record_execlists(ee); + } return ee; } @@ -1558,8 +1574,8 @@ gt_record_uc(struct intel_gt_coredump *gt, return error_uc; } -/* Capture all registers which don't fit into another category. */ -static void gt_record_regs(struct intel_gt_coredump *gt) +/* Capture all global registers which don't fit into another category. */ +static void gt_record_registers_execlist(struct intel_gt_coredump *gt) { struct intel_uncore *uncore = gt->_gt->uncore; struct drm_i915_private *i915 = uncore->i915; @@ -1806,7 +1822,9 @@ intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp) gc->_gt = gt; gc->awake = intel_gt_pm_is_awake(gt); - gt_record_regs(gc); + if (!intel_uc_uses_guc_submission(>->uc)) + gt_record_registers_execlist(gc); + gt_record_fences(gc); return gc; @@ -1871,6 +1889,11 @@ i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) if (INTEL_INFO(i915)->has_gt_uc) error->gt->uc = gt_record_uc(error->gt, compress); + if (intel_uc_uses_guc_submission(>->uc)) + error->gt->uc->capture = intel_guc_capture_store_ptr(>->uc.guc); + else + error->gt->uc->capture = NULL; + i915_vma_capture_finish(error->gt, compress); error->simulated |= error->gt->simulated; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index b98d8cdbe4f2..b55369b245ee 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -17,6 +17,7 @@ #include "gt/intel_engine.h" #include "gt/intel_gt_types.h" #include "gt/uc/intel_uc_fw.h" +#include "gt/uc/intel_guc_capture.h" #include "intel_device_info.h" @@ -151,6 +152,7 @@ struct intel_gt_coredump { struct intel_uc_fw guc_fw; struct intel_uc_fw huc_fw; struct i915_vma_coredump *guc_log; + struct intel_guc_state_capture *capture; } *uc; struct intel_gt_coredump *next; @@ -216,6 +218,9 @@ struct drm_i915_error_state_buf { __printf(2, 3) void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); +void i915_print_error_vma(struct drm_i915_error_state_buf *m, + const struct intel_engine_cs *engine, + const struct i915_vma_coredump *vma); struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask);