@@ -70,6 +70,14 @@ config DRM_I915_CAPTURE_ERROR
If in doubt, say "Y".
+config DRM_I915_AUB_CRASH_DUMP
+ bool "Capture GPU error state in the form of an AUB file"
+ depends on DRM_I915_CAPTURE_ERROR
+ default n
+ help
+ Choose this option to allow the driver to dump a memtrace file (AUB)
+ with the GPU state when a hang is detected.
+
config DRM_I915_COMPRESS_ERROR
bool "Compress GPU error state"
depends on DRM_I915_CAPTURE_ERROR
@@ -124,6 +124,7 @@ i915-y += dvo_ch7017.o \
# Post-mortem debug and GPU hang state capture
i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
+i915-$(CONFIG_DRM_I915_AUB_CRASH_DUMP) += i915_aubcrash.o
i915-$(CONFIG_DRM_I915_SELFTEST) += \
selftests/i915_random.o \
selftests/i915_selftest.o
new file mode 100644
@@ -0,0 +1,47 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Author:
+ * Oscar Mateo <oscar.mateo@intel.com>
+ *
+ */
+
+#include "intel_drv.h"
+#include "i915_aubcrash.h"
+
+/**
+ * DOC: AubCrash
+ *
+ * This code is a companion to i915_gpu_error. The idea is that, on a GPU crash,
+ * we can dump an AUB file that describes the state of the system at the point
+ * of the crash (GTTs, contexts, BBs, BOs, etc...). While i915_gpu_error kind of
+ * already does that, it uses a text format that is not specially human-friendly.
+ * An AUB file, on the other hand, can be used by a number of tools (graphical
+ * AUB file browsers, simulators, emulators, etc...) that facilitate debugging.
+ *
+ */
+
+int i915_error_state_to_aub(struct drm_i915_error_state_buf *m,
+ const struct i915_gpu_state *error)
+{
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _INTEL_AUBCRASH_H_
+#define _INTEL_AUBCRASH_H_
+
+#if IS_ENABLED(CONFIG_DRM_I915_AUB_CRASH_DUMP)
+
+int i915_error_state_to_aub(struct drm_i915_error_state_buf *m,
+ const struct i915_gpu_state *error);
+
+#else
+
+static inline int i915_error_state_to_aub(struct drm_i915_error_state_buf *m,
+ const struct i915_gpu_state *error)
+{
+ return 0;
+}
+
+#endif
+
+#endif
@@ -31,6 +31,7 @@
#include <linux/sched/mm.h>
#include "intel_drv.h"
#include "i915_guc_submission.h"
+#include "i915_aubcrash.h"
static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
{
@@ -938,7 +939,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
- size_t count, loff_t *pos)
+ size_t count, loff_t *pos, bool type_aub)
{
struct i915_gpu_state *error = file->private_data;
struct drm_i915_error_state_buf str;
@@ -952,7 +953,10 @@ static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
if (ret)
return ret;
- ret = i915_error_state_to_str(&str, error);
+ if (type_aub)
+ ret = i915_error_state_to_aub(&str, error);
+ else
+ ret = i915_error_state_to_str(&str, error);
if (ret)
goto out;
@@ -967,6 +971,12 @@ static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
return ret;
}
+static ssize_t gpu_state_read_str(struct file *file, char __user *ubuf,
+ size_t count, loff_t *pos)
+{
+ return gpu_state_read(file, ubuf, count, pos, false);
+}
+
static int gpu_state_release(struct inode *inode, struct file *file)
{
i915_gpu_state_put(file->private_data);
@@ -991,7 +1001,7 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
static const struct file_operations i915_gpu_info_fops = {
.owner = THIS_MODULE,
.open = i915_gpu_info_open,
- .read = gpu_state_read,
+ .read = gpu_state_read_str,
.llseek = default_llseek,
.release = gpu_state_release,
};
@@ -1022,11 +1032,38 @@ static int i915_error_state_open(struct inode *inode, struct file *file)
static const struct file_operations i915_error_state_fops = {
.owner = THIS_MODULE,
.open = i915_error_state_open,
- .read = gpu_state_read,
+ .read = gpu_state_read_str,
+ .write = i915_error_state_write,
+ .llseek = default_llseek,
+ .release = gpu_state_release,
+};
+#endif
+
+#if IS_ENABLED(CONFIG_DRM_I915_AUB_CRASH_DUMP)
+
+static ssize_t gpu_state_read_aub(struct file *file, char __user *ubuf,
+ size_t count, loff_t *pos)
+{
+ return gpu_state_read(file, ubuf, count, pos, true);
+}
+
+static const struct file_operations i915_gpu_info_aub_fops = {
+ .owner = THIS_MODULE,
+ .open = i915_gpu_info_open,
+ .read = gpu_state_read_aub,
+ .llseek = default_llseek,
+ .release = gpu_state_release,
+};
+
+static const struct file_operations i915_error_state_aub_fops = {
+ .owner = THIS_MODULE,
+ .open = i915_error_state_open,
+ .read = gpu_state_read_aub,
.write = i915_error_state_write,
.llseek = default_llseek,
.release = gpu_state_release,
};
+
#endif
static int
@@ -4776,6 +4813,10 @@ static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file)
{"i915_error_state", &i915_error_state_fops},
{"i915_gpu_info", &i915_gpu_info_fops},
#endif
+#if IS_ENABLED(CONFIG_DRM_I915_AUB_CRASH_DUMP)
+ {"i915_error_state_aub", &i915_error_state_aub_fops},
+ {"i915_gpu_info_aub", &i915_gpu_info_aub_fops},
+#endif
{"i915_next_seqno", &i915_next_seqno_fops},
{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
@@ -31,6 +31,7 @@
#include <linux/sysfs.h>
#include "intel_drv.h"
#include "i915_drv.h"
+#include "i915_aubcrash.h"
static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
{
@@ -495,9 +496,8 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr
static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr, char *buf,
- loff_t off, size_t count)
+ loff_t off, size_t count, bool type_aub)
{
-
struct device *kdev = kobj_to_dev(kobj);
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
struct drm_i915_error_state_buf error_str;
@@ -509,7 +509,11 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
return ret;
gpu = i915_first_error_state(dev_priv);
- ret = i915_error_state_to_str(&error_str, gpu);
+
+ if (type_aub)
+ ret = i915_error_state_to_aub(&error_str, gpu);
+ else
+ ret = i915_error_state_to_str(&error_str, gpu);
if (ret)
goto out;
@@ -536,11 +540,18 @@ static ssize_t error_state_write(struct file *file, struct kobject *kobj,
return count;
}
+static ssize_t error_state_read_str(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ return error_state_read(filp, kobj, attr, buf, off, count, false);
+}
+
static const struct bin_attribute error_state_attr = {
.attr.name = "error",
.attr.mode = S_IRUSR | S_IWUSR,
.size = 0,
- .read = error_state_read,
+ .read = error_state_read_str,
.write = error_state_write,
};
@@ -559,6 +570,39 @@ static void i915_setup_error_capture(struct device *kdev) {}
static void i915_teardown_error_capture(struct device *kdev) {}
#endif
+#if IS_ENABLED(CONFIG_DRM_I915_AUB_CRASH_DUMP)
+
+static ssize_t error_state_read_aub(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ return error_state_read(filp, kobj, attr, buf, off, count, true);
+}
+
+static const struct bin_attribute aub_state_attr = {
+ .attr.name = "aub",
+ .attr.mode = S_IRUSR | S_IWUSR,
+ .size = 0,
+ .read = error_state_read_aub,
+ .write = error_state_write,
+};
+
+static void i915_setup_error_capture_aub(struct device *kdev)
+{
+ if (sysfs_create_bin_file(&kdev->kobj, &aub_state_attr))
+ DRM_ERROR("aub_state sysfs setup failed\n");
+}
+
+static void i915_teardown_error_capture_aub(struct device *kdev)
+{
+ sysfs_remove_bin_file(&kdev->kobj, &aub_state_attr);
+}
+
+#else
+static void i915_setup_error_capture_aub(struct device *kdev) {}
+static void i915_teardown_error_capture_aub(struct device *kdev) {}
+#endif
+
void i915_setup_sysfs(struct drm_i915_private *dev_priv)
{
struct device *kdev = dev_priv->drm.primary->kdev;
@@ -606,6 +650,7 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
DRM_ERROR("RPS sysfs setup failed\n");
i915_setup_error_capture(kdev);
+ i915_setup_error_capture_aub(kdev);
}
void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
@@ -613,6 +658,7 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
struct device *kdev = dev_priv->drm.primary->kdev;
i915_teardown_error_capture(kdev);
+ i915_teardown_error_capture_aub(kdev);
if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
sysfs_remove_files(&kdev->kobj, vlv_attrs);
Includes some documentation on what AubCrash is supposed to achieve. Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> Cc: Chris Wilson <chris@chris-wsilon.co.uk> --- drivers/gpu/drm/i915/Kconfig | 8 ++++++ drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_aubcrash.c | 47 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_aubcrash.h | 42 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_debugfs.c | 49 +++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_sysfs.c | 54 +++++++++++++++++++++++++++++++++--- 6 files changed, 193 insertions(+), 8 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_aubcrash.c create mode 100644 drivers/gpu/drm/i915/i915_aubcrash.h