@@ -633,6 +633,53 @@ intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classi
return 0;
}
+#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
+int intel_guc_capture_output_min_size_est(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int worst_min_size = 0, num_regs = 0;
+ size_t tmp = 0;
+
+ /*
+ * If every single engine-instance suffered a failure in quick succession but
+ * were all unrelated, then a burst of multiple error-capture events would dump
+ * registers for every one engine instance, one at a time. In this case, GuC
+ * would even dump the global-registers repeatedly.
+ *
+ * For each engine instance, there would be 1 x guc_state_capture_group_t output
+ * followed by 3 x guc_state_capture_t lists. The latter is how the register
+ * dumps are split across different register types (where the '3' are global vs class
+ * vs instance). Finally, let's multiply the whole thing by 3x (just so we are
+ * not limited to just 1 round of data in a worst case full register dump log)
+ *
+ * NOTE: intel_guc_log that allocates the log buffer would round this size up to
+ * a power of two.
+ */
+
+ for_each_engine(engine, gt, id) {
+ worst_min_size += sizeof(struct guc_state_capture_group_header_t) +
+ (3 * sizeof(struct guc_state_capture_header_t));
+
+ if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp))
+ num_regs += tmp;
+
+ if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
+ engine->class, &tmp)) {
+ num_regs += tmp;
+ }
+ if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
+ engine->class, &tmp)) {
+ num_regs += tmp;
+ }
+ }
+
+ worst_min_size += (num_regs * sizeof(struct guc_mmio_reg));
+
+ return (worst_min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER);
+}
+
static void
guc_capture_free_ads_cache(struct __guc_state_capture_priv *gc)
{
@@ -11,6 +11,7 @@
struct guc_gt_system_info;
struct intel_guc;
+int intel_guc_capture_output_min_size_est(struct intel_guc *guc);
int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
void **listptr);
int intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
@@ -6,10 +6,11 @@
#include <linux/debugfs.h>
#include "gt/intel_gt.h"
+#include "intel_guc_capture.h"
+#include "intel_guc_log.h"
#include "i915_drv.h"
#include "i915_irq.h"
#include "i915_memcpy.h"
-#include "intel_guc_log.h"
static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log);
@@ -464,6 +465,10 @@ int intel_guc_log_create(struct intel_guc_log *log)
* | Capture logs |
* +===============================+ + CAPTURE_SIZE
*/
+ if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE)
+ DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n",
+ CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc));
+
guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
CAPTURE_BUFFER_SIZE;
Add intel_guc_capture_output_min_size_est function to provide a reasonable minimum size for error-capture region before allocating the shared buffer. Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com> --- .../gpu/drm/i915/gt/uc/intel_guc_capture.c | 47 +++++++++++++++++++ .../gpu/drm/i915/gt/uc/intel_guc_capture.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 7 ++- 3 files changed, 54 insertions(+), 1 deletion(-)