diff mbox

[RFC,07/12] drm/i915: Skeleton for AubCrash

Message ID 1509127275-22121-8-git-send-email-oscar.mateo@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

oscar.mateo@intel.com Oct. 27, 2017, 6:01 p.m. UTC
Includes some documentation on what AubCrash is supposed to achieve.

Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Cc: Chris Wilson <chris@chris-wsilon.co.uk>
---
 drivers/gpu/drm/i915/Kconfig         |  8 ++++++
 drivers/gpu/drm/i915/Makefile        |  1 +
 drivers/gpu/drm/i915/i915_aubcrash.c | 47 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_aubcrash.h | 42 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_debugfs.c  | 49 +++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_sysfs.c    | 54 +++++++++++++++++++++++++++++++++---
 6 files changed, 193 insertions(+), 8 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_aubcrash.c
 create mode 100644 drivers/gpu/drm/i915/i915_aubcrash.h

Comments

Chris Wilson Oct. 27, 2017, 6:20 p.m. UTC | #1
Quoting Oscar Mateo (2017-10-27 19:01:10)
> Includes some documentation on what AubCrash is supposed to achieve.

Which is missing *here*.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index dfd9588..176e53e 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -70,6 +70,14 @@  config DRM_I915_CAPTURE_ERROR
 
 	  If in doubt, say "Y".
 
+config DRM_I915_AUB_CRASH_DUMP
+        bool "Capture GPU error state in the form of an AUB file"
+        depends on DRM_I915_CAPTURE_ERROR
+        default n
+        help
+          Choose this option to allow the driver to dump a memtrace file (AUB)
+          with the GPU state when a hang is detected.
+
 config DRM_I915_COMPRESS_ERROR
 	bool "Compress GPU error state"
 	depends on DRM_I915_CAPTURE_ERROR
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 6c3b048..04956c7 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -124,6 +124,7 @@  i915-y += dvo_ch7017.o \
 
 # Post-mortem debug and GPU hang state capture
 i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
+i915-$(CONFIG_DRM_I915_AUB_CRASH_DUMP) += i915_aubcrash.o
 i915-$(CONFIG_DRM_I915_SELFTEST) += \
 	selftests/i915_random.o \
 	selftests/i915_selftest.o
diff --git a/drivers/gpu/drm/i915/i915_aubcrash.c b/drivers/gpu/drm/i915/i915_aubcrash.c
new file mode 100644
index 0000000..95b75ab
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_aubcrash.c
@@ -0,0 +1,47 @@ 
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Author:
+ *    Oscar Mateo <oscar.mateo@intel.com>
+ *
+ */
+
+#include "intel_drv.h"
+#include "i915_aubcrash.h"
+
+/**
+ * DOC: AubCrash
+ *
+ * This code is a companion to i915_gpu_error. The idea is that, on a GPU crash,
+ * we can dump an AUB file that describes the state of the system at the point
+ * of the crash (GTTs, contexts, BBs, BOs, etc...). While i915_gpu_error kind of
+ * already does that, it uses a text format that is not specially human-friendly.
+ * An AUB file, on the other hand, can be used by a number of tools (graphical
+ * AUB file browsers, simulators, emulators, etc...) that facilitate debugging.
+ *
+ */
+
+int i915_error_state_to_aub(struct drm_i915_error_state_buf *m,
+			    const struct i915_gpu_state *error)
+{
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_aubcrash.h b/drivers/gpu/drm/i915/i915_aubcrash.h
new file mode 100644
index 0000000..bab1953
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_aubcrash.h
@@ -0,0 +1,42 @@ 
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _INTEL_AUBCRASH_H_
+#define _INTEL_AUBCRASH_H_
+
+#if IS_ENABLED(CONFIG_DRM_I915_AUB_CRASH_DUMP)
+
+int i915_error_state_to_aub(struct drm_i915_error_state_buf *m,
+                            const struct i915_gpu_state *error);
+
+#else
+
+static inline int i915_error_state_to_aub(struct drm_i915_error_state_buf *m,
+					  const struct i915_gpu_state *error)
+{
+	return 0;
+}
+
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c65e381..f0f23ef 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -31,6 +31,7 @@ 
 #include <linux/sched/mm.h>
 #include "intel_drv.h"
 #include "i915_guc_submission.h"
+#include "i915_aubcrash.h"
 
 static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
 {
@@ -938,7 +939,7 @@  static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
 
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
-			      size_t count, loff_t *pos)
+			      size_t count, loff_t *pos, bool type_aub)
 {
 	struct i915_gpu_state *error = file->private_data;
 	struct drm_i915_error_state_buf str;
@@ -952,7 +953,10 @@  static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
 	if (ret)
 		return ret;
 
-	ret = i915_error_state_to_str(&str, error);
+	if (type_aub)
+		ret = i915_error_state_to_aub(&str, error);
+	else
+		ret = i915_error_state_to_str(&str, error);
 	if (ret)
 		goto out;
 
@@ -967,6 +971,12 @@  static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
 	return ret;
 }
 
+static ssize_t gpu_state_read_str(struct file *file, char __user *ubuf,
+				  size_t count, loff_t *pos)
+{
+	return gpu_state_read(file, ubuf, count, pos, false);
+}
+
 static int gpu_state_release(struct inode *inode, struct file *file)
 {
 	i915_gpu_state_put(file->private_data);
@@ -991,7 +1001,7 @@  static int i915_gpu_info_open(struct inode *inode, struct file *file)
 static const struct file_operations i915_gpu_info_fops = {
 	.owner = THIS_MODULE,
 	.open = i915_gpu_info_open,
-	.read = gpu_state_read,
+	.read = gpu_state_read_str,
 	.llseek = default_llseek,
 	.release = gpu_state_release,
 };
@@ -1022,11 +1032,38 @@  static int i915_error_state_open(struct inode *inode, struct file *file)
 static const struct file_operations i915_error_state_fops = {
 	.owner = THIS_MODULE,
 	.open = i915_error_state_open,
-	.read = gpu_state_read,
+	.read = gpu_state_read_str,
+	.write = i915_error_state_write,
+	.llseek = default_llseek,
+	.release = gpu_state_release,
+};
+#endif
+
+#if IS_ENABLED(CONFIG_DRM_I915_AUB_CRASH_DUMP)
+
+static ssize_t gpu_state_read_aub(struct file *file, char __user *ubuf,
+				  size_t count, loff_t *pos)
+{
+	return gpu_state_read(file, ubuf, count, pos, true);
+}
+
+static const struct file_operations i915_gpu_info_aub_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_gpu_info_open,
+	.read = gpu_state_read_aub,
+	.llseek = default_llseek,
+	.release = gpu_state_release,
+};
+
+static const struct file_operations i915_error_state_aub_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_error_state_open,
+	.read = gpu_state_read_aub,
 	.write = i915_error_state_write,
 	.llseek = default_llseek,
 	.release = gpu_state_release,
 };
+
 #endif
 
 static int
@@ -4776,6 +4813,10 @@  static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file)
 	{"i915_error_state", &i915_error_state_fops},
 	{"i915_gpu_info", &i915_gpu_info_fops},
 #endif
+#if IS_ENABLED(CONFIG_DRM_I915_AUB_CRASH_DUMP)
+	{"i915_error_state_aub", &i915_error_state_aub_fops},
+	{"i915_gpu_info_aub", &i915_gpu_info_aub_fops},
+#endif
 	{"i915_next_seqno", &i915_next_seqno_fops},
 	{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
 	{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 791759f..646ba5f 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -31,6 +31,7 @@ 
 #include <linux/sysfs.h>
 #include "intel_drv.h"
 #include "i915_drv.h"
+#include "i915_aubcrash.h"
 
 static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
 {
@@ -495,9 +496,8 @@  static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr
 
 static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *attr, char *buf,
-				loff_t off, size_t count)
+				loff_t off, size_t count, bool type_aub)
 {
-
 	struct device *kdev = kobj_to_dev(kobj);
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct drm_i915_error_state_buf error_str;
@@ -509,7 +509,11 @@  static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
 		return ret;
 
 	gpu = i915_first_error_state(dev_priv);
-	ret = i915_error_state_to_str(&error_str, gpu);
+
+	if (type_aub)
+		ret = i915_error_state_to_aub(&error_str, gpu);
+	else
+		ret = i915_error_state_to_str(&error_str, gpu);
 	if (ret)
 		goto out;
 
@@ -536,11 +540,18 @@  static ssize_t error_state_write(struct file *file, struct kobject *kobj,
 	return count;
 }
 
+static ssize_t error_state_read_str(struct file *filp, struct kobject *kobj,
+				    struct bin_attribute *attr, char *buf,
+				    loff_t off, size_t count)
+{
+	return error_state_read(filp, kobj, attr, buf, off, count, false);
+}
+
 static const struct bin_attribute error_state_attr = {
 	.attr.name = "error",
 	.attr.mode = S_IRUSR | S_IWUSR,
 	.size = 0,
-	.read = error_state_read,
+	.read = error_state_read_str,
 	.write = error_state_write,
 };
 
@@ -559,6 +570,39 @@  static void i915_setup_error_capture(struct device *kdev) {}
 static void i915_teardown_error_capture(struct device *kdev) {}
 #endif
 
+#if IS_ENABLED(CONFIG_DRM_I915_AUB_CRASH_DUMP)
+
+static ssize_t error_state_read_aub(struct file *filp, struct kobject *kobj,
+				    struct bin_attribute *attr, char *buf,
+				    loff_t off, size_t count)
+{
+	return error_state_read(filp, kobj, attr, buf, off, count, true);
+}
+
+static const struct bin_attribute aub_state_attr = {
+	.attr.name = "aub",
+	.attr.mode = S_IRUSR | S_IWUSR,
+	.size = 0,
+	.read = error_state_read_aub,
+	.write = error_state_write,
+};
+
+static void i915_setup_error_capture_aub(struct device *kdev)
+{
+	if (sysfs_create_bin_file(&kdev->kobj, &aub_state_attr))
+		DRM_ERROR("aub_state sysfs setup failed\n");
+}
+
+static void i915_teardown_error_capture_aub(struct device *kdev)
+{
+	sysfs_remove_bin_file(&kdev->kobj, &aub_state_attr);
+}
+
+#else
+static void i915_setup_error_capture_aub(struct device *kdev) {}
+static void i915_teardown_error_capture_aub(struct device *kdev) {}
+#endif
+
 void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 {
 	struct device *kdev = dev_priv->drm.primary->kdev;
@@ -606,6 +650,7 @@  void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 		DRM_ERROR("RPS sysfs setup failed\n");
 
 	i915_setup_error_capture(kdev);
+	i915_setup_error_capture_aub(kdev);
 }
 
 void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
@@ -613,6 +658,7 @@  void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
 	struct device *kdev = dev_priv->drm.primary->kdev;
 
 	i915_teardown_error_capture(kdev);
+	i915_teardown_error_capture_aub(kdev);
 
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		sysfs_remove_files(&kdev->kobj, vlv_attrs);