Message ID | 20230426205713.512695-6-rodrigo.vivi@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Introduce xe_devcoredump. | expand |
On Wed, Apr 26, 2023 at 04:57:04PM -0400, Rodrigo Vivi wrote: > The goal is to allow for a snapshot capture to be taken at the time > of the crash, while the print out can happen at a later time through > the exposed devcoredump virtual device. > > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> > --- > drivers/gpu/drm/xe/xe_guc_ct.c | 132 +++++++++++++++++++++++---- > drivers/gpu/drm/xe/xe_guc_ct.h | 7 +- > drivers/gpu/drm/xe/xe_guc_ct_types.h | 26 ++++++ > 3 files changed, 145 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c > index e16e5fe37ed4..0b7b95dbd9be 100644 > --- a/drivers/gpu/drm/xe/xe_guc_ct.c > +++ b/drivers/gpu/drm/xe/xe_guc_ct.c > @@ -1095,31 +1095,26 @@ static void g2h_worker_func(struct work_struct *w) > xe_device_mem_access_put(ct_to_xe(ct)); > } > > -static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb, > - struct drm_printer *p) > +static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb, > + struct guc_ctb_snapshot *snapshot) > { > u32 head, tail; > > - drm_printf(p, "\tsize: %d\n", ctb->info.size); > - drm_printf(p, "\tresv_space: %d\n", ctb->info.resv_space); > - drm_printf(p, "\thead: %d\n", ctb->info.head); > - drm_printf(p, "\ttail: %d\n", ctb->info.tail); > - drm_printf(p, "\tspace: %d\n", ctb->info.space); > - drm_printf(p, "\tbroken: %d\n", ctb->info.broken); > + snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32), GFP_ATOMIC); So since this GFP_ATOMIC I assume this so we can call this code from the TDR or a CTB handler (dma fence signaling paths)? Also I don't see where you check for this allocation failing. > > - head = desc_read(xe, ctb, head); > - tail = desc_read(xe, ctb, tail); > - drm_printf(p, "\thead (memory): %d\n", head); > - drm_printf(p, "\ttail (memory): %d\n", tail); > - drm_printf(p, "\tstatus (memory): 0x%x\n", desc_read(xe, ctb, status)); > + xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0, > + sizeof(struct guc_ct_buffer_desc)); > + memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info)); > + > + head = snapshot->desc.head; > + tail = snapshot->desc.tail; > > if (head != tail) { > struct iosys_map map = > IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32)); > > while (head != tail) { > - drm_printf(p, "\tcmd[%d]: 0x%08x\n", head, > - xe_map_rd(xe, &map, 0, u32)); > + snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32); > ++head; > if (head == ctb->info.size) { > head = 0; > @@ -1131,20 +1126,119 @@ static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb, > } > } > > -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p) > +static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot, > + struct drm_printer *p) > +{ > + u32 head, tail; > + > + drm_printf(p, "\tsize: %d\n", snapshot->info.size); > + drm_printf(p, "\tresv_space: %d\n", snapshot->info.space); > + drm_printf(p, "\thead: %d\n", snapshot->info.head); > + drm_printf(p, "\ttail: %d\n", snapshot->info.tail); > + drm_printf(p, "\tspace: %d\n", snapshot->info.space); > + drm_printf(p, "\tbroken: %d\n", snapshot->info.broken); > + drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head); > + drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail); > + drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status); > + > + head = snapshot->desc.head; > + tail = snapshot->desc.tail; > + > + while (head != tail) { > + drm_printf(p, "\tcmd[%d]: 0x%08x\n", head, > + snapshot->cmds[head]); > + ++head; > + if (head == snapshot->info.size) > + head = 0; > + } > +} > + > +static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot) > +{ > + kfree(snapshot->cmds); > +} > + > +/** > + * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state. > + * @ct: GuC CT object. > + * > + * This can be printed out in a later stage like during dev_coredump > + * analysis. > + * > + * Returns: a GuC CT snapshot object that must be freed by the caller > + * by using `xe_guc_ct_snapshot_free`. > + */ > +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct) > { > + struct xe_guc_ct_snapshot *snapshot; > + > + snapshot = kzalloc(sizeof(struct xe_guc_ct_snapshot), GFP_ATOMIC); > + Same here, need to check for an alloc failure. Also maybe we should a flag to switch between GFP_ATOMIC (signaling path) and GFP_KERNEL (debugfs). In the case above, CMDs might be huge as deadlock workaround (like 16 MBs or something) so atomic seems risky. Aside from these comments, I do rather like what you have done here. Matt > if (ct->enabled) { > + snapshot->ct_enabled = true; > + guc_ctb_snapshot_capture(ct_to_xe(ct), &ct->ctbs.h2g, > + &snapshot->h2g); > + guc_ctb_snapshot_capture(ct_to_xe(ct), &ct->ctbs.g2h, > + &snapshot->g2h); > + } > + > + return snapshot; > +} > + > +/** > + * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot. > + * @snapshot: GuC CT snapshot object. > + * @p: drm_printer where it will be printed out. > + * > + * This function prints out a given GuC CT snapshot object. > + */ > +void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, > + struct drm_printer *p) > +{ > + if (snapshot->ct_enabled) { > drm_puts(p, "\nH2G CTB (all sizes in DW):\n"); > - guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.h2g, p); > + guc_ctb_snapshot_print(&snapshot->h2g, p); > > drm_puts(p, "\nG2H CTB (all sizes in DW):\n"); > - guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.g2h, p); > - drm_printf(p, "\tg2h outstanding: %d\n", ct->g2h_outstanding); > + guc_ctb_snapshot_print(&snapshot->g2h, p); > + > + drm_printf(p, "\tg2h outstanding: %d\n", > + snapshot->g2h_outstanding); > } else { > drm_puts(p, "\nCT disabled\n"); > } > } > > +/** > + * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot. > + * @snapshot: GuC CT snapshot object. > + * > + * This function free all the memory that needed to be allocated at capture > + * time. > + */ > +void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot) > +{ > + guc_ctb_snapshot_free(&snapshot->h2g); > + guc_ctb_snapshot_free(&snapshot->g2h); > + kfree(snapshot); > +} > + > +/** > + * xe_guc_ct_print - GuC CT Print. > + * @ct: GuC CT. > + * @p: drm_printer where it will be printed out. > + * > + * This function quickly capture a snapshot and immediately print it out. > + */ > +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p) > +{ > + struct xe_guc_ct_snapshot *snapshot; > + > + snapshot = xe_guc_ct_snapshot_capture(ct); > + xe_guc_ct_snapshot_print(snapshot, p); > + xe_guc_ct_snapshot_free(snapshot); > +} > + > #ifdef XE_GUC_CT_SELFTEST > /* > * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow > diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h > index 49fb74f91e4d..29e0dff7ad9b 100644 > --- a/drivers/gpu/drm/xe/xe_guc_ct.h > +++ b/drivers/gpu/drm/xe/xe_guc_ct.h > @@ -13,9 +13,14 @@ struct drm_printer; > int xe_guc_ct_init(struct xe_guc_ct *ct); > int xe_guc_ct_enable(struct xe_guc_ct *ct); > void xe_guc_ct_disable(struct xe_guc_ct *ct); > -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p); > void xe_guc_ct_fast_path(struct xe_guc_ct *ct); > > +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct); > +void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, > + struct drm_printer *p); > +void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); > +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p); > + > static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct) > { > wake_up_all(&ct->wq); > diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h > index 64e3dd14d4b2..93046d95b009 100644 > --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h > +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h > @@ -48,6 +48,32 @@ struct guc_ctb { > struct guc_ctb_info info; > }; > > +/** > + * struct guc_ctb_snapshot - GuC command transport buffer (CTB) snapshot > + */ > +struct guc_ctb_snapshot { > + /** @desc: snapshot of the CTB descriptor */ > + struct guc_ct_buffer_desc desc; > + /** @cmds: snapshot of the CTB commands */ > + u32 *cmds; > + /** @info: snapshot of the CTB info */ > + struct guc_ctb_info info; > +}; > + > +/** > + * struct xe_guc_ct_snapshot - GuC command transport (CT) snapshot > + */ > +struct xe_guc_ct_snapshot { > + /** @ct_enabled: CT enabled info at capture time. */ > + bool ct_enabled; > + /** @g2h_outstanding: G2H outstanding info at the capture time */ > + u32 g2h_outstanding; > + /** @g2h: G2H CTB snapshot */ > + struct guc_ctb_snapshot g2h; > + /** @h2g: H2G CTB snapshot */ > + struct guc_ctb_snapshot h2g; > +}; > + > /** > * struct xe_guc_ct - GuC command transport (CT) layer > * > -- > 2.39.2 >
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index e16e5fe37ed4..0b7b95dbd9be 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1095,31 +1095,26 @@ static void g2h_worker_func(struct work_struct *w) xe_device_mem_access_put(ct_to_xe(ct)); } -static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb, - struct drm_printer *p) +static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb, + struct guc_ctb_snapshot *snapshot) { u32 head, tail; - drm_printf(p, "\tsize: %d\n", ctb->info.size); - drm_printf(p, "\tresv_space: %d\n", ctb->info.resv_space); - drm_printf(p, "\thead: %d\n", ctb->info.head); - drm_printf(p, "\ttail: %d\n", ctb->info.tail); - drm_printf(p, "\tspace: %d\n", ctb->info.space); - drm_printf(p, "\tbroken: %d\n", ctb->info.broken); + snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32), GFP_ATOMIC); - head = desc_read(xe, ctb, head); - tail = desc_read(xe, ctb, tail); - drm_printf(p, "\thead (memory): %d\n", head); - drm_printf(p, "\ttail (memory): %d\n", tail); - drm_printf(p, "\tstatus (memory): 0x%x\n", desc_read(xe, ctb, status)); + xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0, + sizeof(struct guc_ct_buffer_desc)); + memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info)); + + head = snapshot->desc.head; + tail = snapshot->desc.tail; if (head != tail) { struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32)); while (head != tail) { - drm_printf(p, "\tcmd[%d]: 0x%08x\n", head, - xe_map_rd(xe, &map, 0, u32)); + snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32); ++head; if (head == ctb->info.size) { head = 0; @@ -1131,20 +1126,119 @@ static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb, } } -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p) +static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot, + struct drm_printer *p) +{ + u32 head, tail; + + drm_printf(p, "\tsize: %d\n", snapshot->info.size); + drm_printf(p, "\tresv_space: %d\n", snapshot->info.space); + drm_printf(p, "\thead: %d\n", snapshot->info.head); + drm_printf(p, "\ttail: %d\n", snapshot->info.tail); + drm_printf(p, "\tspace: %d\n", snapshot->info.space); + drm_printf(p, "\tbroken: %d\n", snapshot->info.broken); + drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head); + drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail); + drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status); + + head = snapshot->desc.head; + tail = snapshot->desc.tail; + + while (head != tail) { + drm_printf(p, "\tcmd[%d]: 0x%08x\n", head, + snapshot->cmds[head]); + ++head; + if (head == snapshot->info.size) + head = 0; + } +} + +static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot) +{ + kfree(snapshot->cmds); +} + +/** + * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state. + * @ct: GuC CT object. + * + * This can be printed out in a later stage like during dev_coredump + * analysis. + * + * Returns: a GuC CT snapshot object that must be freed by the caller + * by using `xe_guc_ct_snapshot_free`. + */ +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct) { + struct xe_guc_ct_snapshot *snapshot; + + snapshot = kzalloc(sizeof(struct xe_guc_ct_snapshot), GFP_ATOMIC); + if (ct->enabled) { + snapshot->ct_enabled = true; + guc_ctb_snapshot_capture(ct_to_xe(ct), &ct->ctbs.h2g, + &snapshot->h2g); + guc_ctb_snapshot_capture(ct_to_xe(ct), &ct->ctbs.g2h, + &snapshot->g2h); + } + + return snapshot; +} + +/** + * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot. + * @snapshot: GuC CT snapshot object. + * @p: drm_printer where it will be printed out. + * + * This function prints out a given GuC CT snapshot object. + */ +void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, + struct drm_printer *p) +{ + if (snapshot->ct_enabled) { drm_puts(p, "\nH2G CTB (all sizes in DW):\n"); - guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.h2g, p); + guc_ctb_snapshot_print(&snapshot->h2g, p); drm_puts(p, "\nG2H CTB (all sizes in DW):\n"); - guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.g2h, p); - drm_printf(p, "\tg2h outstanding: %d\n", ct->g2h_outstanding); + guc_ctb_snapshot_print(&snapshot->g2h, p); + + drm_printf(p, "\tg2h outstanding: %d\n", + snapshot->g2h_outstanding); } else { drm_puts(p, "\nCT disabled\n"); } } +/** + * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot. + * @snapshot: GuC CT snapshot object. + * + * This function free all the memory that needed to be allocated at capture + * time. + */ +void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot) +{ + guc_ctb_snapshot_free(&snapshot->h2g); + guc_ctb_snapshot_free(&snapshot->g2h); + kfree(snapshot); +} + +/** + * xe_guc_ct_print - GuC CT Print. + * @ct: GuC CT. + * @p: drm_printer where it will be printed out. + * + * This function quickly capture a snapshot and immediately print it out. + */ +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p) +{ + struct xe_guc_ct_snapshot *snapshot; + + snapshot = xe_guc_ct_snapshot_capture(ct); + xe_guc_ct_snapshot_print(snapshot, p); + xe_guc_ct_snapshot_free(snapshot); +} + #ifdef XE_GUC_CT_SELFTEST /* * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 49fb74f91e4d..29e0dff7ad9b 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -13,9 +13,14 @@ struct drm_printer; int xe_guc_ct_init(struct xe_guc_ct *ct); int xe_guc_ct_enable(struct xe_guc_ct *ct); void xe_guc_ct_disable(struct xe_guc_ct *ct); -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p); void xe_guc_ct_fast_path(struct xe_guc_ct *ct); +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct); +void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, + struct drm_printer *p); +void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p); + static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct) { wake_up_all(&ct->wq); diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index 64e3dd14d4b2..93046d95b009 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -48,6 +48,32 @@ struct guc_ctb { struct guc_ctb_info info; }; +/** + * struct guc_ctb_snapshot - GuC command transport buffer (CTB) snapshot + */ +struct guc_ctb_snapshot { + /** @desc: snapshot of the CTB descriptor */ + struct guc_ct_buffer_desc desc; + /** @cmds: snapshot of the CTB commands */ + u32 *cmds; + /** @info: snapshot of the CTB info */ + struct guc_ctb_info info; +}; + +/** + * struct xe_guc_ct_snapshot - GuC command transport (CT) snapshot + */ +struct xe_guc_ct_snapshot { + /** @ct_enabled: CT enabled info at capture time. */ + bool ct_enabled; + /** @g2h_outstanding: G2H outstanding info at the capture time */ + u32 g2h_outstanding; + /** @g2h: G2H CTB snapshot */ + struct guc_ctb_snapshot g2h; + /** @h2g: H2G CTB snapshot */ + struct guc_ctb_snapshot h2g; +}; + /** * struct xe_guc_ct - GuC command transport (CT) layer *
The goal is to allow for a snapshot capture to be taken at the time of the crash, while the print out can happen at a later time through the exposed devcoredump virtual device. Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> --- drivers/gpu/drm/xe/xe_guc_ct.c | 132 +++++++++++++++++++++++---- drivers/gpu/drm/xe/xe_guc_ct.h | 7 +- drivers/gpu/drm/xe/xe_guc_ct_types.h | 26 ++++++ 3 files changed, 145 insertions(+), 20 deletions(-)