diff mbox series

[1/2] drm/i915: Add MOCS state dump to debugfs

Message ID 20190807205556.40435-1-stuart.summers@intel.com (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/i915: Add MOCS state dump to debugfs | expand

Commit Message

Summers, Stuart Aug. 7, 2019, 8:55 p.m. UTC
User applications might need to verify hardware configuration
of the MOCS entries. To facilitate this debug, add a new debugfs
entry to allow a dump of the MOCS state to verify expected values
are set by i915.

Signed-off-by: Stuart Summers <stuart.summers@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 50 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_mocs.h |  3 ++
 drivers/gpu/drm/i915/i915_debugfs.c  | 12 +++++++
 3 files changed, 65 insertions(+)

Comments

Chris Wilson Aug. 7, 2019, 9:29 p.m. UTC | #1
Quoting Stuart Summers (2019-08-07 21:55:55)
> User applications might need to verify hardware configuration
> of the MOCS entries. To facilitate this debug, add a new debugfs
> entry to allow a dump of the MOCS state to verify expected values
> are set by i915.

User applications + debugfs? It's not an avenue for ABI.

If you really want to provide the settings back to userspace, look at
something like an i915_query or sysfs.

Or if you just mean igt, then add a Testcase:

If you just need to validate that we are setting and restoring them,
selftests.

If you need them for debugging errors, add them to the error state.
-Chris
Summers, Stuart Aug. 7, 2019, 9:48 p.m. UTC | #2
On Wed, 2019-08-07 at 22:29 +0100, Chris Wilson wrote:
> Quoting Stuart Summers (2019-08-07 21:55:55)
> > User applications might need to verify hardware configuration
> > of the MOCS entries. To facilitate this debug, add a new debugfs
> > entry to allow a dump of the MOCS state to verify expected values
> > are set by i915.
> 
> User applications + debugfs? It's not an avenue for ABI.
> 
> If you really want to provide the settings back to userspace, look at
> something like an i915_query or sysfs.
> 
> Or if you just mean igt, then add a Testcase:
> 
> If you just need to validate that we are setting and restoring them,
> selftests.
> 
> If you need them for debugging errors, add them to the error state.

This was probably poorly worded, you're right. I'll update the commit
message to be more specific.

I do want this for debugging, but not sure error state is the right
place. This is for debugging performance issues, so no specific
failures. If you feel sysfs or i915_query are more correct here, I can
look at adding this there instead. Is there a reason we don't want this
in debugfs specifically?

Thanks,
Stuart

> -Chris
Kumar Valsan, Prathap Aug. 7, 2019, 9:54 p.m. UTC | #3
On Wed, Aug 07, 2019 at 01:55:55PM -0700, Stuart Summers wrote:
> User applications might need to verify hardware configuration
> of the MOCS entries. To facilitate this debug, add a new debugfs
> entry to allow a dump of the MOCS state to verify expected values
> are set by i915.
> 
> Signed-off-by: Stuart Summers <stuart.summers@intel.com>

Acked-by: Prathap Kumar Valsan <prathap.kumar.valsan@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_mocs.c | 50 ++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/gt/intel_mocs.h |  3 ++
>  drivers/gpu/drm/i915/i915_debugfs.c  | 12 +++++++
>  3 files changed, 65 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index 728704bbbe18..fea8ef2fd2aa 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -625,6 +625,56 @@ int intel_mocs_emit(struct i915_request *rq)
>  	return 0;
>  }
>  
> +static void
> +intel_mocs_dump_l3cc_table(struct intel_gt *gt, struct drm_printer *p)
> +{
> +	struct intel_uncore *uncore = gt->uncore;
> +	struct drm_i915_mocs_table table;
> +	unsigned int i;
> +
> +	if (!get_mocs_settings(gt, &table))
> +		return;
> +
> +	drm_printf(p, "l3cc:\n");
> +
> +	for (i = 0; i < table.n_entries / 2; i++) {
> +		u32 reg = intel_uncore_read(uncore, GEN9_LNCFCMOCS(i));
> +
> +		drm_printf(p, "  MOCS[%d]: 0x%x\n", i * 2, reg & 0xffff);
> +		drm_printf(p, "  MOCS[%d]: 0x%x\n", i * 2 + 1, reg >> 16);
> +	}
> +}
> +
> +static void
> +intel_mocs_dump_global(struct intel_gt *gt, struct drm_printer *p)
> +{
> +	struct intel_uncore *uncore = gt->uncore;
> +	struct drm_i915_mocs_table table;
> +	unsigned int i;
> +
> +	GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915));
> +
> +	if (!get_mocs_settings(gt, &table))
> +		return;
> +
> +	if (GEM_DEBUG_WARN_ON(table.size > table.n_entries))
> +		return;
> +
> +	drm_printf(p, "global:\n");
> +
> +	for (i = 0; i < table.n_entries; i++)
> +		drm_printf(p, "  MOCS[%d]: 0x%x\n",
> +			   i, intel_uncore_read(uncore, GEN12_GLOBAL_MOCS(i)));
> +}
> +
> +void intel_mocs_show_info(struct intel_gt *gt, struct drm_printer *p)
> +{
> +	intel_mocs_dump_l3cc_table(gt, p);
> +
> +	if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
> +		intel_mocs_dump_global(gt, p);
> +}
> +
>  void intel_mocs_init(struct intel_gt *gt)
>  {
>  	intel_mocs_init_l3cc_table(gt);
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
> index 2ae816b7ca19..0ef95ce818d3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
> @@ -24,6 +24,8 @@
>  #ifndef INTEL_MOCS_H
>  #define INTEL_MOCS_H
>  
> +#include <drm/drm_print.h>
> +
>  /**
>   * DOC: Memory Objects Control State (MOCS)
>   *
> @@ -55,6 +57,7 @@ struct intel_gt;
>  
>  void intel_mocs_init(struct intel_gt *gt);
>  void intel_mocs_init_engine(struct intel_engine_cs *engine);
> +void intel_mocs_show_info(struct intel_gt *gt, struct drm_printer *p);
>  
>  int intel_mocs_emit(struct i915_request *rq);
>  
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 3b15266c54fd..1aa022eb2c3d 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -41,6 +41,7 @@
>  
>  #include "gem/i915_gem_context.h"
>  #include "gt/intel_reset.h"
> +#include "gt/intel_mocs.h"
>  #include "gt/uc/intel_guc_submission.h"
>  
>  #include "i915_debugfs.h"
> @@ -76,6 +77,16 @@ static int i915_capabilities(struct seq_file *m, void *data)
>  	return 0;
>  }
>  
> +static int show_mocs_info(struct seq_file *m, void *data)
> +{
> +	struct drm_i915_private *i915 = node_to_i915(m->private);
> +	struct drm_printer p = drm_seq_file_printer(m);
> +
> +	intel_mocs_show_info(&i915->gt, &p);
> +
> +	return 0;
> +}
> +
>  static char get_pin_flag(struct drm_i915_gem_object *obj)
>  {
>  	return obj->pin_global ? 'p' : ' ';
> @@ -4352,6 +4363,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
>  	{"i915_sseu_status", i915_sseu_status, 0},
>  	{"i915_drrs_status", i915_drrs_status, 0},
>  	{"i915_rps_boost_info", i915_rps_boost_info, 0},
> +	{"i915_mocs_info", show_mocs_info, 0},
>  };
>  #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
>  
> -- 
> 2.22.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Aug. 7, 2019, 10:01 p.m. UTC | #4
Quoting Stuart Summers (2019-08-07 22:48:55)
> On Wed, 2019-08-07 at 22:29 +0100, Chris Wilson wrote:
> > Quoting Stuart Summers (2019-08-07 21:55:55)
> > > User applications might need to verify hardware configuration
> > > of the MOCS entries. To facilitate this debug, add a new debugfs
> > > entry to allow a dump of the MOCS state to verify expected values
> > > are set by i915.
> > 
> > User applications + debugfs? It's not an avenue for ABI.
> > 
> > If you really want to provide the settings back to userspace, look at
> > something like an i915_query or sysfs.
> > 
> > Or if you just mean igt, then add a Testcase:
> > 
> > If you just need to validate that we are setting and restoring them,
> > selftests.
> > 
> > If you need them for debugging errors, add them to the error state.
> 
> This was probably poorly worded, you're right. I'll update the commit
> message to be more specific.
> 
> I do want this for debugging, but not sure error state is the right
> place. This is for debugging performance issues, so no specific
> failures. If you feel sysfs or i915_query are more correct here, I can
> look at adding this there instead. Is there a reason we don't want this
> in debugfs specifically?

No, it was just the wording implied to me you had a use case for
clients, not just debugging the kernel.

Adding it to the error state (see i915_gpu_info) is not too bad an idea
if you need a sledgehammer to inspect the GPU state while a batch is
executing, but really it just sounds like you want to automate checking
the mocs registers against "ideal" state. They should be static, so once
they are set, so long as we are confident and check that they do not
change nor can be scribbled over by userspace, you only need to scan the
source :)

I will add that I wish we took a more complete snapshot of interesting
registers for the error state.
-Chris
Summers, Stuart Aug. 7, 2019, 11 p.m. UTC | #5
On Wed, 2019-08-07 at 23:01 +0100, Chris Wilson wrote:
> Quoting Stuart Summers (2019-08-07 22:48:55)
> > On Wed, 2019-08-07 at 22:29 +0100, Chris Wilson wrote:
> > > Quoting Stuart Summers (2019-08-07 21:55:55)
> > > > User applications might need to verify hardware configuration
> > > > of the MOCS entries. To facilitate this debug, add a new
> > > > debugfs
> > > > entry to allow a dump of the MOCS state to verify expected
> > > > values
> > > > are set by i915.
> > > 
> > > User applications + debugfs? It's not an avenue for ABI.
> > > 
> > > If you really want to provide the settings back to userspace,
> > > look at
> > > something like an i915_query or sysfs.
> > > 
> > > Or if you just mean igt, then add a Testcase:
> > > 
> > > If you just need to validate that we are setting and restoring
> > > them,
> > > selftests.
> > > 
> > > If you need them for debugging errors, add them to the error
> > > state.
> > 
> > This was probably poorly worded, you're right. I'll update the
> > commit
> > message to be more specific.
> > 
> > I do want this for debugging, but not sure error state is the right
> > place. This is for debugging performance issues, so no specific
> > failures. If you feel sysfs or i915_query are more correct here, I
> > can
> > look at adding this there instead. Is there a reason we don't want
> > this
> > in debugfs specifically?
> 
> No, it was just the wording implied to me you had a use case for
> clients, not just debugging the kernel.
> 
> Adding it to the error state (see i915_gpu_info) is not too bad an
> idea
> if you need a sledgehammer to inspect the GPU state while a batch is
> executing, but really it just sounds like you want to automate
> checking
> the mocs registers against "ideal" state. They should be static, so
> once
> they are set, so long as we are confident and check that they do not
> change nor can be scribbled over by userspace, you only need to scan
> the
> source :)
> 
> I will add that I wish we took a more complete snapshot of
> interesting
> registers for the error state.

I guess my question is about intent of the error state. I can add it
there, but do we want this to indicate any register state we might want
to investigate, even if the registers are "correct", but just need
review based on current behavior?

Thanks,
Stuart

> -Chris
Chris Wilson Aug. 7, 2019, 11:12 p.m. UTC | #6
Quoting Stuart Summers (2019-08-08 00:00:17)
> On Wed, 2019-08-07 at 23:01 +0100, Chris Wilson wrote:
> > Quoting Stuart Summers (2019-08-07 22:48:55)
> > > On Wed, 2019-08-07 at 22:29 +0100, Chris Wilson wrote:
> > > > Quoting Stuart Summers (2019-08-07 21:55:55)
> > > > > User applications might need to verify hardware configuration
> > > > > of the MOCS entries. To facilitate this debug, add a new
> > > > > debugfs
> > > > > entry to allow a dump of the MOCS state to verify expected
> > > > > values
> > > > > are set by i915.
> > > > 
> > > > User applications + debugfs? It's not an avenue for ABI.
> > > > 
> > > > If you really want to provide the settings back to userspace,
> > > > look at
> > > > something like an i915_query or sysfs.
> > > > 
> > > > Or if you just mean igt, then add a Testcase:
> > > > 
> > > > If you just need to validate that we are setting and restoring
> > > > them,
> > > > selftests.
> > > > 
> > > > If you need them for debugging errors, add them to the error
> > > > state.
> > > 
> > > This was probably poorly worded, you're right. I'll update the
> > > commit
> > > message to be more specific.
> > > 
> > > I do want this for debugging, but not sure error state is the right
> > > place. This is for debugging performance issues, so no specific
> > > failures. If you feel sysfs or i915_query are more correct here, I
> > > can
> > > look at adding this there instead. Is there a reason we don't want
> > > this
> > > in debugfs specifically?
> > 
> > No, it was just the wording implied to me you had a use case for
> > clients, not just debugging the kernel.
> > 
> > Adding it to the error state (see i915_gpu_info) is not too bad an
> > idea
> > if you need a sledgehammer to inspect the GPU state while a batch is
> > executing, but really it just sounds like you want to automate
> > checking
> > the mocs registers against "ideal" state. They should be static, so
> > once
> > they are set, so long as we are confident and check that they do not
> > change nor can be scribbled over by userspace, you only need to scan
> > the
> > source :)
> > 
> > I will add that I wish we took a more complete snapshot of
> > interesting
> > registers for the error state.
> 
> I guess my question is about intent of the error state. I can add it
> there, but do we want this to indicate any register state we might want
> to investigate, even if the registers are "correct", but just need
> review based on current behavior?

It was created for debugging userspace batches (later added to hang
detection as a means of automatically grabbing the hopefully relevant
batch). As such it's a motley collection of information that at some
point proved useful. If you can make use of it, and find it more useful
to have the mocs registers in the same snapshot as the user batch,
please do include it. (Fwiw, I would like to extend the error state with
a bunch of { offset:0xfoo, value:0xbar } given a set of tables listing
the interesting regs. There just hasn't been an urgent need. Also on
that wishlist is devcoredump.)
-Chris
Summers, Stuart Aug. 8, 2019, 12:09 a.m. UTC | #7
On Thu, 2019-08-08 at 00:12 +0100, Chris Wilson wrote:
> Quoting Stuart Summers (2019-08-08 00:00:17)
> > On Wed, 2019-08-07 at 23:01 +0100, Chris Wilson wrote:
> > > Quoting Stuart Summers (2019-08-07 22:48:55)
> > > > On Wed, 2019-08-07 at 22:29 +0100, Chris Wilson wrote:
> > > > > Quoting Stuart Summers (2019-08-07 21:55:55)
> > > > > > User applications might need to verify hardware
> > > > > > configuration
> > > > > > of the MOCS entries. To facilitate this debug, add a new
> > > > > > debugfs
> > > > > > entry to allow a dump of the MOCS state to verify expected
> > > > > > values
> > > > > > are set by i915.
> > > > > 
> > > > > User applications + debugfs? It's not an avenue for ABI.
> > > > > 
> > > > > If you really want to provide the settings back to userspace,
> > > > > look at
> > > > > something like an i915_query or sysfs.
> > > > > 
> > > > > Or if you just mean igt, then add a Testcase:
> > > > > 
> > > > > If you just need to validate that we are setting and
> > > > > restoring
> > > > > them,
> > > > > selftests.
> > > > > 
> > > > > If you need them for debugging errors, add them to the error
> > > > > state.
> > > > 
> > > > This was probably poorly worded, you're right. I'll update the
> > > > commit
> > > > message to be more specific.
> > > > 
> > > > I do want this for debugging, but not sure error state is the
> > > > right
> > > > place. This is for debugging performance issues, so no specific
> > > > failures. If you feel sysfs or i915_query are more correct
> > > > here, I
> > > > can
> > > > look at adding this there instead. Is there a reason we don't
> > > > want
> > > > this
> > > > in debugfs specifically?
> > > 
> > > No, it was just the wording implied to me you had a use case for
> > > clients, not just debugging the kernel.
> > > 
> > > Adding it to the error state (see i915_gpu_info) is not too bad
> > > an
> > > idea
> > > if you need a sledgehammer to inspect the GPU state while a batch
> > > is
> > > executing, but really it just sounds like you want to automate
> > > checking
> > > the mocs registers against "ideal" state. They should be static,
> > > so
> > > once
> > > they are set, so long as we are confident and check that they do
> > > not
> > > change nor can be scribbled over by userspace, you only need to
> > > scan
> > > the
> > > source :)
> > > 
> > > I will add that I wish we took a more complete snapshot of
> > > interesting
> > > registers for the error state.
> > 
> > I guess my question is about intent of the error state. I can add
> > it
> > there, but do we want this to indicate any register state we might
> > want
> > to investigate, even if the registers are "correct", but just need
> > review based on current behavior?
> 
> It was created for debugging userspace batches (later added to hang
> detection as a means of automatically grabbing the hopefully relevant
> batch). As such it's a motley collection of information that at some
> point proved useful. If you can make use of it, and find it more
> useful
> to have the mocs registers in the same snapshot as the user batch,
> please do include it. (Fwiw, I would like to extend the error state
> with
> a bunch of { offset:0xfoo, value:0xbar } given a set of tables
> listing
> the interesting regs. There just hasn't been an urgent need. Also on
> that wishlist is devcoredump.)

Ok perfect, thanks for the history here! I'll rework this into the
error state. If the intent is a catch-all where we can easily see the
state of the GPU at any given time, I agree having a large list of
registers we dump here for review would be really interesting.

Thanks,
Stuart

> -Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 728704bbbe18..fea8ef2fd2aa 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -625,6 +625,56 @@  int intel_mocs_emit(struct i915_request *rq)
 	return 0;
 }
 
+static void
+intel_mocs_dump_l3cc_table(struct intel_gt *gt, struct drm_printer *p)
+{
+	struct intel_uncore *uncore = gt->uncore;
+	struct drm_i915_mocs_table table;
+	unsigned int i;
+
+	if (!get_mocs_settings(gt, &table))
+		return;
+
+	drm_printf(p, "l3cc:\n");
+
+	for (i = 0; i < table.n_entries / 2; i++) {
+		u32 reg = intel_uncore_read(uncore, GEN9_LNCFCMOCS(i));
+
+		drm_printf(p, "  MOCS[%d]: 0x%x\n", i * 2, reg & 0xffff);
+		drm_printf(p, "  MOCS[%d]: 0x%x\n", i * 2 + 1, reg >> 16);
+	}
+}
+
+static void
+intel_mocs_dump_global(struct intel_gt *gt, struct drm_printer *p)
+{
+	struct intel_uncore *uncore = gt->uncore;
+	struct drm_i915_mocs_table table;
+	unsigned int i;
+
+	GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915));
+
+	if (!get_mocs_settings(gt, &table))
+		return;
+
+	if (GEM_DEBUG_WARN_ON(table.size > table.n_entries))
+		return;
+
+	drm_printf(p, "global:\n");
+
+	for (i = 0; i < table.n_entries; i++)
+		drm_printf(p, "  MOCS[%d]: 0x%x\n",
+			   i, intel_uncore_read(uncore, GEN12_GLOBAL_MOCS(i)));
+}
+
+void intel_mocs_show_info(struct intel_gt *gt, struct drm_printer *p)
+{
+	intel_mocs_dump_l3cc_table(gt, p);
+
+	if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
+		intel_mocs_dump_global(gt, p);
+}
+
 void intel_mocs_init(struct intel_gt *gt)
 {
 	intel_mocs_init_l3cc_table(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
index 2ae816b7ca19..0ef95ce818d3 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.h
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
@@ -24,6 +24,8 @@ 
 #ifndef INTEL_MOCS_H
 #define INTEL_MOCS_H
 
+#include <drm/drm_print.h>
+
 /**
  * DOC: Memory Objects Control State (MOCS)
  *
@@ -55,6 +57,7 @@  struct intel_gt;
 
 void intel_mocs_init(struct intel_gt *gt);
 void intel_mocs_init_engine(struct intel_engine_cs *engine);
+void intel_mocs_show_info(struct intel_gt *gt, struct drm_printer *p);
 
 int intel_mocs_emit(struct i915_request *rq);
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3b15266c54fd..1aa022eb2c3d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -41,6 +41,7 @@ 
 
 #include "gem/i915_gem_context.h"
 #include "gt/intel_reset.h"
+#include "gt/intel_mocs.h"
 #include "gt/uc/intel_guc_submission.h"
 
 #include "i915_debugfs.h"
@@ -76,6 +77,16 @@  static int i915_capabilities(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int show_mocs_info(struct seq_file *m, void *data)
+{
+	struct drm_i915_private *i915 = node_to_i915(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	intel_mocs_show_info(&i915->gt, &p);
+
+	return 0;
+}
+
 static char get_pin_flag(struct drm_i915_gem_object *obj)
 {
 	return obj->pin_global ? 'p' : ' ';
@@ -4352,6 +4363,7 @@  static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_sseu_status", i915_sseu_status, 0},
 	{"i915_drrs_status", i915_drrs_status, 0},
 	{"i915_rps_boost_info", i915_rps_boost_info, 0},
+	{"i915_mocs_info", show_mocs_info, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)