Message ID | 20211122230402.2023576-6-alan.previn.teres.alexis@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add GuC Error Capture Support | expand |
On Mon, Nov 22, 2021 at 03:04:00PM -0800, Alan Previn wrote: > GuC log buffer regions for debug-log-events, crash-dumps and > error-state-capture are all a single bo allocation that includes > the guc_log_buffer_state structures. > > Since the error-capture region is accessed with high priority at non- > deterministic times (as part of gpu coredump) while the debug-log-event > region is populated and accessed with different priorities, timings and > consumers, let's split out separate locks for buffer-state accesses > of each region. > > Also, ensure a global mapping is made up front for the entire bo > throughout GuC operation so that dynamic mapping and unmapping isn't > required for error capture log access if relay-logging isn't running. > > Additionally, while here, make some readibility improvements: > 1. change previous function names with "capture_logs" to > "copy_debug_logs" to help make the distinction clearer. > 2. Update the guc log region mapping comments to order them > according to the enum definition as per the GuC interface. > Nothing major, just a couple nits below. > Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com> > --- > drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 + > .../gpu/drm/i915/gt/uc/intel_guc_capture.c | 46 +++++++ > .../gpu/drm/i915/gt/uc/intel_guc_capture.h | 1 + > drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 120 ++++++++++++------ > drivers/gpu/drm/i915/gt/uc/intel_guc_log.h | 14 +- > 5 files changed, 137 insertions(+), 46 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > index d136c69abe12..e0db21bbffdd 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > @@ -34,6 +34,8 @@ struct intel_guc { > struct intel_uc_fw fw; > /** @log: sub-structure containing GuC log related data and objects */ > struct intel_guc_log log; > + /** @log_state: states and locks for each subregion of GuC's log buffer */ > + struct intel_guc_log_stats log_state[GUC_MAX_LOG_BUFFER]; > /** @ct: the command transport communication channel */ > struct intel_guc_ct ct; > /** @slpc: sub-structure containing SLPC related data and objects */ > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c > index eec1d193ac26..0cb358a98605 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c > @@ -344,6 +344,52 @@ int intel_guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 > return -ENODATA; > } > > +int intel_guc_capture_output_min_size_est(struct intel_guc *guc) > +{ > + struct intel_gt *gt = guc_to_gt(guc); > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + int worst_min_size = 0, num_regs = 0; > + u16 tmp = 0; > + > + /* > + * If every single engine-instance suffered a failure in quick succession but > + * were all unrelated, then a burst of multiple error-capture events would dump > + * registers for every one engine instance, one at a time. In this case, GuC > + * would even dump the global-registers repeatedly. > + * > + * For each engine instance, there would be 1 x intel_guc_capture_out_group output > + * followed by 3 x intel_guc_capture_out_data lists. The latter is how the register > + * dumps are split across different register types (where the '3' are global vs class > + * vs instance). Finally, let's multiply the whole thing by 3x (just so we are > + * not limited to just 1 rounds of data in a worst case full register dump log) s/a worst/a worst/ > + * > + * NOTE: intel_guc_log that allocates the log buffer would round this size up to > + * a power of two. > + */ > + > + for_each_engine(engine, gt, id) { > + worst_min_size += sizeof(struct intel_guc_capture_out_group_header) + > + (3 * sizeof(struct intel_guc_capture_out_data_header)); > + > + if (!intel_guc_capture_list_count(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp)) > + num_regs += tmp; > + > + if (!intel_guc_capture_list_count(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, > + engine->class, &tmp)) { > + num_regs += tmp; > + } > + if (!intel_guc_capture_list_count(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, > + engine->class, &tmp)) { > + num_regs += tmp; > + } > + } > + > + worst_min_size += (num_regs * sizeof(struct guc_mmio_reg)); > + > + return (worst_min_size * 3); Maybe a define for the '3' here describing what the '3' means. > +} > + > void intel_guc_capture_destroy(struct intel_guc *guc) > { > guc_capture_clear_ext_regs(guc->capture.reglists); > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h > index b2454b6cd778..839b53425e1e 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h > @@ -78,6 +78,7 @@ int intel_guc_capture_list_count(struct intel_guc *guc, u32 owner, u32 type, u32 > u16 *num_entries); > int intel_guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 class, > struct guc_mmio_reg *ptr, u16 num_entries); > +int intel_guc_capture_output_min_size_est(struct intel_guc *guc); > void intel_guc_capture_destroy(struct intel_guc *guc); > int intel_guc_capture_init(struct intel_guc *guc); > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c > index 1962a43302a8..dd86530f77a1 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c > @@ -10,7 +10,7 @@ > #include "i915_memcpy.h" > #include "intel_guc_log.h" > > -static void guc_log_capture_logs(struct intel_guc_log *log); > +static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log); > > /** > * DOC: GuC firmware log > @@ -149,7 +149,7 @@ static void guc_move_to_next_buf(struct intel_guc_log *log) > smp_wmb(); > > /* All data has been written, so now move the offset of sub buffer. */ > - relay_reserve(log->relay.channel, log->vma->obj->base.size); > + relay_reserve(log->relay.channel, log->vma->obj->base.size - CAPTURE_BUFFER_SIZE); > > /* Switch to the next sub buffer */ > relay_flush(log->relay.channel); > @@ -169,25 +169,25 @@ static void *guc_get_write_buffer(struct intel_guc_log *log) > return relay_reserve(log->relay.channel, 0); > } > > -static bool guc_check_log_buf_overflow(struct intel_guc_log *log, > - enum guc_log_buffer_type type, > - unsigned int full_cnt) > +bool guc_check_log_buf_overflow(struct intel_guc *guc, > + struct intel_guc_log_stats *log_state, > + unsigned int full_cnt) I don't think you meant to drop the 'static' here. > { > - unsigned int prev_full_cnt = log->stats[type].sampled_overflow; > + unsigned int prev_full_cnt = log_state->sampled_overflow; > bool overflow = false; > > if (full_cnt != prev_full_cnt) { > overflow = true; > > - log->stats[type].overflow = full_cnt; > - log->stats[type].sampled_overflow += full_cnt - prev_full_cnt; > + log_state->overflow = full_cnt; > + log_state->sampled_overflow += full_cnt - prev_full_cnt; > > if (full_cnt < prev_full_cnt) { > /* buffer_full_cnt is a 4 bit counter */ > - log->stats[type].sampled_overflow += 16; > + log_state->sampled_overflow += 16; > } > > - dev_notice_ratelimited(guc_to_gt(log_to_guc(log))->i915->drm.dev, > + dev_notice_ratelimited(guc_to_gt(guc)->i915->drm.dev, > "GuC log buffer overflow\n"); > } > > @@ -210,8 +210,10 @@ static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) > return 0; > } > > -static void guc_read_update_log_buffer(struct intel_guc_log *log) > +static void _guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) > { > + struct intel_guc *guc = log_to_guc(log); > + struct intel_guc_log_stats *logstate; > unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_cnt; > struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state; > struct guc_log_buffer_state log_buf_state_local; > @@ -235,7 +237,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) > * Used rate limited to avoid deluge of messages, logs might be > * getting consumed by User at a slow rate. > */ > - DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n"); > + DRM_ERROR_RATELIMITED("no sub-buffer to copy general logs\n"); > log->relay.full_count++; > > goto out_unlock; > @@ -245,12 +247,16 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) > src_data += PAGE_SIZE; > dst_data += PAGE_SIZE; > > - for (type = GUC_DEBUG_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { > + /* For relay logging, we exclude error state capture */ > + for (type = GUC_DEBUG_LOG_BUFFER; type <= GUC_CRASH_DUMP_LOG_BUFFER; type++) { > /* > + * Get a lock to the buffer_state we want to read and update. > * Make a copy of the state structure, inside GuC log buffer > * (which is uncached mapped), on the stack to avoid reading > * from it multiple times. > */ > + logstate = &guc->log_state[type]; > + mutex_lock(&logstate->lock); > memcpy(&log_buf_state_local, log_buf_state, > sizeof(struct guc_log_buffer_state)); > buffer_size = guc_get_log_buffer_size(type); > @@ -259,13 +265,14 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) > full_cnt = log_buf_state_local.buffer_full_cnt; > > /* Bookkeeping stuff */ > - log->stats[type].flush += log_buf_state_local.flush_to_file; > - new_overflow = guc_check_log_buf_overflow(log, type, full_cnt); > + logstate->flush += log_buf_state_local.flush_to_file; > + new_overflow = guc_check_log_buf_overflow(guc, logstate, full_cnt); > > /* Update the state of shared log buffer */ > log_buf_state->read_ptr = write_offset; > log_buf_state->flush_to_file = 0; > log_buf_state++; > + mutex_unlock(&logstate->lock); > > /* First copy the state structure in snapshot buffer */ > memcpy(log_buf_snapshot_state, &log_buf_state_local, > @@ -313,15 +320,15 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) > mutex_unlock(&log->relay.lock); > } > > -static void capture_logs_work(struct work_struct *work) > +static void copy_debug_logs_work(struct work_struct *work) > { > struct intel_guc_log *log = > container_of(work, struct intel_guc_log, relay.flush_work); > > - guc_log_capture_logs(log); > + guc_log_copy_debuglogs_for_relay(log); > } > > -static int guc_log_map(struct intel_guc_log *log) > +static int guc_log_relay_map(struct intel_guc_log *log) > { > void *vaddr; > > @@ -333,7 +340,9 @@ static int guc_log_map(struct intel_guc_log *log) > /* > * Create a WC (Uncached for read) vmalloc mapping of log > * buffer pages, so that we can directly get the data > - * (up-to-date) from memory. > + * (up-to-date) from memory. This has already been > + * mapped at GuC Init time (for error-state-capture), but > + * call it again anyway for book-keeping > */ > vaddr = i915_gem_object_pin_map_unlocked(log->vma->obj, I915_MAP_WC); > if (IS_ERR(vaddr)) > @@ -344,7 +353,7 @@ static int guc_log_map(struct intel_guc_log *log) > return 0; > } > > -static void guc_log_unmap(struct intel_guc_log *log) > +static void guc_log_relay_unmap(struct intel_guc_log *log) > { > lockdep_assert_held(&log->relay.lock); > > @@ -354,8 +363,14 @@ static void guc_log_unmap(struct intel_guc_log *log) > > void intel_guc_log_init_early(struct intel_guc_log *log) > { > + struct intel_guc *guc = log_to_guc(log); > + int n; > + > + for (n = GUC_DEBUG_LOG_BUFFER; n < GUC_MAX_LOG_BUFFER; n++) > + mutex_init(&guc->log_state[n].lock); > + > mutex_init(&log->relay.lock); > - INIT_WORK(&log->relay.flush_work, capture_logs_work); > + INIT_WORK(&log->relay.flush_work, copy_debug_logs_work); > log->relay.started = false; > } > > @@ -370,8 +385,11 @@ static int guc_log_relay_create(struct intel_guc_log *log) > lockdep_assert_held(&log->relay.lock); > GEM_BUG_ON(!log->vma); > > - /* Keep the size of sub buffers same as shared log buffer */ > - subbuf_size = log->vma->size; > + /* > + * Keep the size of sub buffers same as shared log buffer > + * but GuC log-events excludes the error-state-capture logs > + */ > + subbuf_size = log->vma->size - CAPTURE_BUFFER_SIZE; > > /* > * Store up to 8 snapshots, which is large enough to buffer sufficient > @@ -406,13 +424,13 @@ static void guc_log_relay_destroy(struct intel_guc_log *log) > log->relay.channel = NULL; > } > > -static void guc_log_capture_logs(struct intel_guc_log *log) > +static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) > { > struct intel_guc *guc = log_to_guc(log); > struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; > intel_wakeref_t wakeref; > > - guc_read_update_log_buffer(log); > + _guc_log_copy_debuglogs_for_relay(log); > > /* > * Generally device is expected to be active only at this > @@ -452,6 +470,7 @@ int intel_guc_log_create(struct intel_guc_log *log) > { > struct intel_guc *guc = log_to_guc(log); > struct i915_vma *vma; > + void *vaddr; > u32 guc_log_size; > int ret; > > @@ -459,23 +478,31 @@ int intel_guc_log_create(struct intel_guc_log *log) > > /* > * GuC Log buffer Layout > + * (this ordering must follow "enum guc_log_buffer_type" definition) > * > * +===============================+ 00B > - * | Crash dump state header | > - * +-------------------------------+ 32B > * | Debug state header | > + * +-------------------------------+ 32B > + * | Crash dump state header | > + * +-------------------------------+ 64B > + * | Capture state header | > * +-------------------------------+ 64B > * | Capture state header | > * +-------------------------------+ 96B > * | | > * +===============================+ PAGE_SIZE (4KB) > - * | Crash Dump logs | > - * +===============================+ + CRASH_SIZE > * | Debug logs | > * +===============================+ + DEBUG_SIZE > + * | Crash Dump logs | > + * +===============================+ + CRASH_SIZE > + * | Capture logs | > + * +===============================+ + CAPTURE_SIZE > */ > - guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + > - CAPTURE_BUFFER_SIZE; > + if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE) > + DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n", > + CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc)); > + > + guc_log_size = PAGE_SIZE + DEBUG_BUFFER_SIZE + CRASH_BUFFER_SIZE + CAPTURE_BUFFER_SIZE; I'd personally keep the original formatting here. > > vma = intel_guc_allocate_vma(guc, guc_log_size); > if (IS_ERR(vma)) { > @@ -484,6 +511,17 @@ int intel_guc_log_create(struct intel_guc_log *log) > } > > log->vma = vma; > + /* > + * Create a WC (Uncached for read) vmalloc mapping up front immediate access to > + * data from memory during critical events such as error capture > + */ > + vaddr = i915_gem_object_pin_map_unlocked(log->vma->obj, I915_MAP_WC); > + if (IS_ERR(vaddr)) { > + ret = PTR_ERR(vaddr); > + i915_vma_unpin_and_release(&log->vma, 0); > + goto err; > + } > + log->buf_addr = vaddr; > > log->level = __get_default_log_level(log); > DRM_DEBUG_DRIVER("guc_log_level=%d (%s, verbose:%s, verbosity:%d)\n", > @@ -494,13 +532,14 @@ int intel_guc_log_create(struct intel_guc_log *log) > return 0; > > err: > - DRM_ERROR("Failed to allocate GuC log buffer. %d\n", ret); > + DRM_ERROR("Failed to allocate or map GuC log buffer. %d\n", ret); > return ret; > } > > void intel_guc_log_destroy(struct intel_guc_log *log) > { > - i915_vma_unpin_and_release(&log->vma, 0); > + log->buf_addr = NULL; > + i915_vma_unpin_and_release(&log->vma, I915_VMA_RELEASE_MAP); > } > > int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) > @@ -545,7 +584,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) > > bool intel_guc_log_relay_created(const struct intel_guc_log *log) > { > - return log->relay.buf_addr; > + return log->buf_addr; > } > > int intel_guc_log_relay_open(struct intel_guc_log *log) > @@ -576,7 +615,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) > if (ret) > goto out_unlock; > > - ret = guc_log_map(log); > + ret = guc_log_relay_map(log); > if (ret) > goto out_relay; > > @@ -628,8 +667,8 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) > with_intel_runtime_pm(guc_to_gt(guc)->uncore->rpm, wakeref) > guc_action_flush_log(guc); > > - /* GuC would have updated log buffer by now, so capture it */ > - guc_log_capture_logs(log); > + /* GuC would have updated log buffer by now, so copy it */ > + guc_log_copy_debuglogs_for_relay(log); > } > > /* > @@ -659,7 +698,7 @@ void intel_guc_log_relay_close(struct intel_guc_log *log) > > mutex_lock(&log->relay.lock); > GEM_BUG_ON(!intel_guc_log_relay_created(log)); > - guc_log_unmap(log); > + guc_log_relay_unmap(log); > guc_log_relay_destroy(log); > mutex_unlock(&log->relay.lock); > } > @@ -695,6 +734,7 @@ stringify_guc_log_type(enum guc_log_buffer_type type) > */ > void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p) > { > + struct intel_guc *guc = log_to_guc(log); > enum guc_log_buffer_type type; > > if (!intel_guc_log_relay_created(log)) { > @@ -709,8 +749,8 @@ void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p) > for (type = GUC_DEBUG_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { > drm_printf(p, "\t%s:\tflush count %10u, overflow count %10u\n", > stringify_guc_log_type(type), > - log->stats[type].flush, > - log->stats[type].sampled_overflow); > + guc->log_state[type].flush, > + guc->log_state[type].sampled_overflow); > } > } > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h > index 9d9004dc58f1..2968023f7447 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h > @@ -42,9 +42,17 @@ struct intel_guc; > #define GUC_VERBOSITY_TO_LOG_LEVEL(x) ((x) + 2) > #define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX) > > +struct intel_guc_log_stats { > + struct mutex lock; /* protects below and guc_log_buffer_state's read-ptr */ > + u32 sampled_overflow; > + u32 overflow; > + u32 flush; > +}; > + > struct intel_guc_log { > u32 level; > struct i915_vma *vma; > + void *buf_addr; I don't think you need both 'buf_addr' and 'relay.buf_addr' as they are the same value, right? Matt > struct { > void *buf_addr; > bool started; > @@ -53,12 +61,6 @@ struct intel_guc_log { > struct mutex lock; > u32 full_count; > } relay; > - /* logging related stats */ > - struct { > - u32 sampled_overflow; > - u32 overflow; > - u32 flush; > - } stats[GUC_MAX_LOG_BUFFER]; > }; > > void intel_guc_log_init_early(struct intel_guc_log *log); > -- > 2.25.1 >
On Tue, Dec 07, 2021 at 03:33:00PM -0800, Teres Alexis, Alan Previn wrote: > Thank you for the detailed review Matt. Responses and follow up questions on some of them below (wanna > make sure i dont misunderstand). > > Will fix all the rest - glad we dont have any design problems .. so far :) > > ...alan > > On Tue, 2021-12-07 at 14:31 -0800, Matthew Brost wrote: > > On Mon, Nov 22, 2021 at 03:04:00PM -0800, Alan Previn wrote: > > > -static bool guc_check_log_buf_overflow(struct intel_guc_log *log, > > > - enum guc_log_buffer_type type, > > > - unsigned int full_cnt) > > > +bool guc_check_log_buf_overflow(struct intel_guc *guc, > > > + struct intel_guc_log_stats *log_state, > > > + unsigned int full_cnt) > > > > I don't think you meant to drop the 'static' here. > actually i do need to call it from guc_capture - but that was on the next patch. > my action would be to move this change to the next patch and i guess change the name to "intel_guc..."? > (im assuming we dont wanna duplicate this). > Ok. Yes, if you export a function add the intel_ prefix. > > > > > + > > > + guc_log_size = PAGE_SIZE + DEBUG_BUFFER_SIZE + CRASH_BUFFER_SIZE + CAPTURE_BUFFER_SIZE; > > > > I'd personally keep the original formatting here. > > > Question: - You are refering to just that last line of the "guc_log_size = .." above right? > Yes. > > > struct intel_guc_log { > > > u32 level; > > > struct i915_vma *vma; > > > + void *buf_addr; > > > > I don't think you need both 'buf_addr' and 'relay.buf_addr' as they are > > the same value, right? > > > Matt > > > Clarification: In the baseline code, i believe we use the "relay.buf_addr" like an enable flag > way to verify that the guc relay debugfs was invoked by user space and is currently active. > (which can be disabled. That said I can definitely remove that relay.buf_addr by introducing > a more descriptive flag such as "relay.active". Assume this is ok right? > Sounds good to me. Matt >
Thank you for the detailed review Matt. Responses and follow up questions on some of them below (wanna make sure i dont misunderstand). Will fix all the rest - glad we dont have any design problems .. so far :) ...alan On Tue, 2021-12-07 at 14:31 -0800, Matthew Brost wrote: > On Mon, Nov 22, 2021 at 03:04:00PM -0800, Alan Previn wrote: > > -static bool guc_check_log_buf_overflow(struct intel_guc_log *log, > > - enum guc_log_buffer_type type, > > - unsigned int full_cnt) > > +bool guc_check_log_buf_overflow(struct intel_guc *guc, > > + struct intel_guc_log_stats *log_state, > > + unsigned int full_cnt) > > I don't think you meant to drop the 'static' here. actually i do need to call it from guc_capture - but that was on the next patch. my action would be to move this change to the next patch and i guess change the name to "intel_guc..."? (im assuming we dont wanna duplicate this). > > > + > > + guc_log_size = PAGE_SIZE + DEBUG_BUFFER_SIZE + CRASH_BUFFER_SIZE + CAPTURE_BUFFER_SIZE; > > I'd personally keep the original formatting here. > Question: - You are refering to just that last line of the "guc_log_size = .." above right? > > struct intel_guc_log { > > u32 level; > > struct i915_vma *vma; > > + void *buf_addr; > > I don't think you need both 'buf_addr' and 'relay.buf_addr' as they are > the same value, right? > Matt > Clarification: In the baseline code, i believe we use the "relay.buf_addr" like an enable flag way to verify that the guc relay debugfs was invoked by user space and is currently active. (which can be disabled. That said I can definitely remove that relay.buf_addr by introducing a more descriptive flag such as "relay.active". Assume this is ok right?
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index d136c69abe12..e0db21bbffdd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -34,6 +34,8 @@ struct intel_guc { struct intel_uc_fw fw; /** @log: sub-structure containing GuC log related data and objects */ struct intel_guc_log log; + /** @log_state: states and locks for each subregion of GuC's log buffer */ + struct intel_guc_log_stats log_state[GUC_MAX_LOG_BUFFER]; /** @ct: the command transport communication channel */ struct intel_guc_ct ct; /** @slpc: sub-structure containing SLPC related data and objects */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index eec1d193ac26..0cb358a98605 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -344,6 +344,52 @@ int intel_guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 return -ENODATA; } +int intel_guc_capture_output_min_size_est(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + int worst_min_size = 0, num_regs = 0; + u16 tmp = 0; + + /* + * If every single engine-instance suffered a failure in quick succession but + * were all unrelated, then a burst of multiple error-capture events would dump + * registers for every one engine instance, one at a time. In this case, GuC + * would even dump the global-registers repeatedly. + * + * For each engine instance, there would be 1 x intel_guc_capture_out_group output + * followed by 3 x intel_guc_capture_out_data lists. The latter is how the register + * dumps are split across different register types (where the '3' are global vs class + * vs instance). Finally, let's multiply the whole thing by 3x (just so we are + * not limited to just 1 rounds of data in a worst case full register dump log) + * + * NOTE: intel_guc_log that allocates the log buffer would round this size up to + * a power of two. + */ + + for_each_engine(engine, gt, id) { + worst_min_size += sizeof(struct intel_guc_capture_out_group_header) + + (3 * sizeof(struct intel_guc_capture_out_data_header)); + + if (!intel_guc_capture_list_count(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp)) + num_regs += tmp; + + if (!intel_guc_capture_list_count(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, + engine->class, &tmp)) { + num_regs += tmp; + } + if (!intel_guc_capture_list_count(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, + engine->class, &tmp)) { + num_regs += tmp; + } + } + + worst_min_size += (num_regs * sizeof(struct guc_mmio_reg)); + + return (worst_min_size * 3); +} + void intel_guc_capture_destroy(struct intel_guc *guc) { guc_capture_clear_ext_regs(guc->capture.reglists); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h index b2454b6cd778..839b53425e1e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h @@ -78,6 +78,7 @@ int intel_guc_capture_list_count(struct intel_guc *guc, u32 owner, u32 type, u32 u16 *num_entries); int intel_guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 class, struct guc_mmio_reg *ptr, u16 num_entries); +int intel_guc_capture_output_min_size_est(struct intel_guc *guc); void intel_guc_capture_destroy(struct intel_guc *guc); int intel_guc_capture_init(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 1962a43302a8..dd86530f77a1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -10,7 +10,7 @@ #include "i915_memcpy.h" #include "intel_guc_log.h" -static void guc_log_capture_logs(struct intel_guc_log *log); +static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log); /** * DOC: GuC firmware log @@ -149,7 +149,7 @@ static void guc_move_to_next_buf(struct intel_guc_log *log) smp_wmb(); /* All data has been written, so now move the offset of sub buffer. */ - relay_reserve(log->relay.channel, log->vma->obj->base.size); + relay_reserve(log->relay.channel, log->vma->obj->base.size - CAPTURE_BUFFER_SIZE); /* Switch to the next sub buffer */ relay_flush(log->relay.channel); @@ -169,25 +169,25 @@ static void *guc_get_write_buffer(struct intel_guc_log *log) return relay_reserve(log->relay.channel, 0); } -static bool guc_check_log_buf_overflow(struct intel_guc_log *log, - enum guc_log_buffer_type type, - unsigned int full_cnt) +bool guc_check_log_buf_overflow(struct intel_guc *guc, + struct intel_guc_log_stats *log_state, + unsigned int full_cnt) { - unsigned int prev_full_cnt = log->stats[type].sampled_overflow; + unsigned int prev_full_cnt = log_state->sampled_overflow; bool overflow = false; if (full_cnt != prev_full_cnt) { overflow = true; - log->stats[type].overflow = full_cnt; - log->stats[type].sampled_overflow += full_cnt - prev_full_cnt; + log_state->overflow = full_cnt; + log_state->sampled_overflow += full_cnt - prev_full_cnt; if (full_cnt < prev_full_cnt) { /* buffer_full_cnt is a 4 bit counter */ - log->stats[type].sampled_overflow += 16; + log_state->sampled_overflow += 16; } - dev_notice_ratelimited(guc_to_gt(log_to_guc(log))->i915->drm.dev, + dev_notice_ratelimited(guc_to_gt(guc)->i915->drm.dev, "GuC log buffer overflow\n"); } @@ -210,8 +210,10 @@ static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) return 0; } -static void guc_read_update_log_buffer(struct intel_guc_log *log) +static void _guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) { + struct intel_guc *guc = log_to_guc(log); + struct intel_guc_log_stats *logstate; unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_cnt; struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state; struct guc_log_buffer_state log_buf_state_local; @@ -235,7 +237,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) * Used rate limited to avoid deluge of messages, logs might be * getting consumed by User at a slow rate. */ - DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n"); + DRM_ERROR_RATELIMITED("no sub-buffer to copy general logs\n"); log->relay.full_count++; goto out_unlock; @@ -245,12 +247,16 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) src_data += PAGE_SIZE; dst_data += PAGE_SIZE; - for (type = GUC_DEBUG_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { + /* For relay logging, we exclude error state capture */ + for (type = GUC_DEBUG_LOG_BUFFER; type <= GUC_CRASH_DUMP_LOG_BUFFER; type++) { /* + * Get a lock to the buffer_state we want to read and update. * Make a copy of the state structure, inside GuC log buffer * (which is uncached mapped), on the stack to avoid reading * from it multiple times. */ + logstate = &guc->log_state[type]; + mutex_lock(&logstate->lock); memcpy(&log_buf_state_local, log_buf_state, sizeof(struct guc_log_buffer_state)); buffer_size = guc_get_log_buffer_size(type); @@ -259,13 +265,14 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) full_cnt = log_buf_state_local.buffer_full_cnt; /* Bookkeeping stuff */ - log->stats[type].flush += log_buf_state_local.flush_to_file; - new_overflow = guc_check_log_buf_overflow(log, type, full_cnt); + logstate->flush += log_buf_state_local.flush_to_file; + new_overflow = guc_check_log_buf_overflow(guc, logstate, full_cnt); /* Update the state of shared log buffer */ log_buf_state->read_ptr = write_offset; log_buf_state->flush_to_file = 0; log_buf_state++; + mutex_unlock(&logstate->lock); /* First copy the state structure in snapshot buffer */ memcpy(log_buf_snapshot_state, &log_buf_state_local, @@ -313,15 +320,15 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) mutex_unlock(&log->relay.lock); } -static void capture_logs_work(struct work_struct *work) +static void copy_debug_logs_work(struct work_struct *work) { struct intel_guc_log *log = container_of(work, struct intel_guc_log, relay.flush_work); - guc_log_capture_logs(log); + guc_log_copy_debuglogs_for_relay(log); } -static int guc_log_map(struct intel_guc_log *log) +static int guc_log_relay_map(struct intel_guc_log *log) { void *vaddr; @@ -333,7 +340,9 @@ static int guc_log_map(struct intel_guc_log *log) /* * Create a WC (Uncached for read) vmalloc mapping of log * buffer pages, so that we can directly get the data - * (up-to-date) from memory. + * (up-to-date) from memory. This has already been + * mapped at GuC Init time (for error-state-capture), but + * call it again anyway for book-keeping */ vaddr = i915_gem_object_pin_map_unlocked(log->vma->obj, I915_MAP_WC); if (IS_ERR(vaddr)) @@ -344,7 +353,7 @@ static int guc_log_map(struct intel_guc_log *log) return 0; } -static void guc_log_unmap(struct intel_guc_log *log) +static void guc_log_relay_unmap(struct intel_guc_log *log) { lockdep_assert_held(&log->relay.lock); @@ -354,8 +363,14 @@ static void guc_log_unmap(struct intel_guc_log *log) void intel_guc_log_init_early(struct intel_guc_log *log) { + struct intel_guc *guc = log_to_guc(log); + int n; + + for (n = GUC_DEBUG_LOG_BUFFER; n < GUC_MAX_LOG_BUFFER; n++) + mutex_init(&guc->log_state[n].lock); + mutex_init(&log->relay.lock); - INIT_WORK(&log->relay.flush_work, capture_logs_work); + INIT_WORK(&log->relay.flush_work, copy_debug_logs_work); log->relay.started = false; } @@ -370,8 +385,11 @@ static int guc_log_relay_create(struct intel_guc_log *log) lockdep_assert_held(&log->relay.lock); GEM_BUG_ON(!log->vma); - /* Keep the size of sub buffers same as shared log buffer */ - subbuf_size = log->vma->size; + /* + * Keep the size of sub buffers same as shared log buffer + * but GuC log-events excludes the error-state-capture logs + */ + subbuf_size = log->vma->size - CAPTURE_BUFFER_SIZE; /* * Store up to 8 snapshots, which is large enough to buffer sufficient @@ -406,13 +424,13 @@ static void guc_log_relay_destroy(struct intel_guc_log *log) log->relay.channel = NULL; } -static void guc_log_capture_logs(struct intel_guc_log *log) +static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; intel_wakeref_t wakeref; - guc_read_update_log_buffer(log); + _guc_log_copy_debuglogs_for_relay(log); /* * Generally device is expected to be active only at this @@ -452,6 +470,7 @@ int intel_guc_log_create(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); struct i915_vma *vma; + void *vaddr; u32 guc_log_size; int ret; @@ -459,23 +478,31 @@ int intel_guc_log_create(struct intel_guc_log *log) /* * GuC Log buffer Layout + * (this ordering must follow "enum guc_log_buffer_type" definition) * * +===============================+ 00B - * | Crash dump state header | - * +-------------------------------+ 32B * | Debug state header | + * +-------------------------------+ 32B + * | Crash dump state header | + * +-------------------------------+ 64B + * | Capture state header | * +-------------------------------+ 64B * | Capture state header | * +-------------------------------+ 96B * | | * +===============================+ PAGE_SIZE (4KB) - * | Crash Dump logs | - * +===============================+ + CRASH_SIZE * | Debug logs | * +===============================+ + DEBUG_SIZE + * | Crash Dump logs | + * +===============================+ + CRASH_SIZE + * | Capture logs | + * +===============================+ + CAPTURE_SIZE */ - guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + - CAPTURE_BUFFER_SIZE; + if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE) + DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n", + CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc)); + + guc_log_size = PAGE_SIZE + DEBUG_BUFFER_SIZE + CRASH_BUFFER_SIZE + CAPTURE_BUFFER_SIZE; vma = intel_guc_allocate_vma(guc, guc_log_size); if (IS_ERR(vma)) { @@ -484,6 +511,17 @@ int intel_guc_log_create(struct intel_guc_log *log) } log->vma = vma; + /* + * Create a WC (Uncached for read) vmalloc mapping up front immediate access to + * data from memory during critical events such as error capture + */ + vaddr = i915_gem_object_pin_map_unlocked(log->vma->obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + i915_vma_unpin_and_release(&log->vma, 0); + goto err; + } + log->buf_addr = vaddr; log->level = __get_default_log_level(log); DRM_DEBUG_DRIVER("guc_log_level=%d (%s, verbose:%s, verbosity:%d)\n", @@ -494,13 +532,14 @@ int intel_guc_log_create(struct intel_guc_log *log) return 0; err: - DRM_ERROR("Failed to allocate GuC log buffer. %d\n", ret); + DRM_ERROR("Failed to allocate or map GuC log buffer. %d\n", ret); return ret; } void intel_guc_log_destroy(struct intel_guc_log *log) { - i915_vma_unpin_and_release(&log->vma, 0); + log->buf_addr = NULL; + i915_vma_unpin_and_release(&log->vma, I915_VMA_RELEASE_MAP); } int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) @@ -545,7 +584,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) bool intel_guc_log_relay_created(const struct intel_guc_log *log) { - return log->relay.buf_addr; + return log->buf_addr; } int intel_guc_log_relay_open(struct intel_guc_log *log) @@ -576,7 +615,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) if (ret) goto out_unlock; - ret = guc_log_map(log); + ret = guc_log_relay_map(log); if (ret) goto out_relay; @@ -628,8 +667,8 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) with_intel_runtime_pm(guc_to_gt(guc)->uncore->rpm, wakeref) guc_action_flush_log(guc); - /* GuC would have updated log buffer by now, so capture it */ - guc_log_capture_logs(log); + /* GuC would have updated log buffer by now, so copy it */ + guc_log_copy_debuglogs_for_relay(log); } /* @@ -659,7 +698,7 @@ void intel_guc_log_relay_close(struct intel_guc_log *log) mutex_lock(&log->relay.lock); GEM_BUG_ON(!intel_guc_log_relay_created(log)); - guc_log_unmap(log); + guc_log_relay_unmap(log); guc_log_relay_destroy(log); mutex_unlock(&log->relay.lock); } @@ -695,6 +734,7 @@ stringify_guc_log_type(enum guc_log_buffer_type type) */ void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p) { + struct intel_guc *guc = log_to_guc(log); enum guc_log_buffer_type type; if (!intel_guc_log_relay_created(log)) { @@ -709,8 +749,8 @@ void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p) for (type = GUC_DEBUG_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { drm_printf(p, "\t%s:\tflush count %10u, overflow count %10u\n", stringify_guc_log_type(type), - log->stats[type].flush, - log->stats[type].sampled_overflow); + guc->log_state[type].flush, + guc->log_state[type].sampled_overflow); } } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 9d9004dc58f1..2968023f7447 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -42,9 +42,17 @@ struct intel_guc; #define GUC_VERBOSITY_TO_LOG_LEVEL(x) ((x) + 2) #define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX) +struct intel_guc_log_stats { + struct mutex lock; /* protects below and guc_log_buffer_state's read-ptr */ + u32 sampled_overflow; + u32 overflow; + u32 flush; +}; + struct intel_guc_log { u32 level; struct i915_vma *vma; + void *buf_addr; struct { void *buf_addr; bool started; @@ -53,12 +61,6 @@ struct intel_guc_log { struct mutex lock; u32 full_count; } relay; - /* logging related stats */ - struct { - u32 sampled_overflow; - u32 overflow; - u32 flush; - } stats[GUC_MAX_LOG_BUFFER]; }; void intel_guc_log_init_early(struct intel_guc_log *log);
GuC log buffer regions for debug-log-events, crash-dumps and error-state-capture are all a single bo allocation that includes the guc_log_buffer_state structures. Since the error-capture region is accessed with high priority at non- deterministic times (as part of gpu coredump) while the debug-log-event region is populated and accessed with different priorities, timings and consumers, let's split out separate locks for buffer-state accesses of each region. Also, ensure a global mapping is made up front for the entire bo throughout GuC operation so that dynamic mapping and unmapping isn't required for error capture log access if relay-logging isn't running. Additionally, while here, make some readibility improvements: 1. change previous function names with "capture_logs" to "copy_debug_logs" to help make the distinction clearer. 2. Update the guc log region mapping comments to order them according to the enum definition as per the GuC interface. Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com> --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 + .../gpu/drm/i915/gt/uc/intel_guc_capture.c | 46 +++++++ .../gpu/drm/i915/gt/uc/intel_guc_capture.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 120 ++++++++++++------ drivers/gpu/drm/i915/gt/uc/intel_guc_log.h | 14 +- 5 files changed, 137 insertions(+), 46 deletions(-)