diff mbox

[v6,2/8] ath10k: provide firmware crash info via debugfs

Message ID 20140809180805.29215.45232.stgit@potku.adurom.net (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Kalle Valo Aug. 9, 2014, 6:08 p.m. UTC
From: Ben Greear <greearb@candelatech.com>

Store the firmware crash registers and last 128 or so
firmware debug-log ids and present them to user-space
via debugfs.

Should help with figuring out why the firmware crashed.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
---
 drivers/net/wireless/ath/ath10k/core.h  |   27 +++
 drivers/net/wireless/ath/ath10k/debug.c |  273 +++++++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath10k/debug.h |   22 ++
 drivers/net/wireless/ath/ath10k/hw.h    |   30 +++
 drivers/net/wireless/ath/ath10k/pci.c   |  133 ++++++++++++++-
 drivers/net/wireless/ath/ath10k/pci.h   |    3 
 6 files changed, 478 insertions(+), 10 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Michal Kazior Aug. 18, 2014, 8:54 a.m. UTC | #1
On 9 August 2014 20:08, Kalle Valo <kvalo@qca.qualcomm.com> wrote:
> From: Ben Greear <greearb@candelatech.com>
>
> Store the firmware crash registers and last 128 or so
> firmware debug-log ids and present them to user-space
> via debugfs.
>
> Should help with figuring out why the firmware crashed.
>
> Signed-off-by: Ben Greear <greearb@candelatech.com>
> Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
> ---
>  drivers/net/wireless/ath/ath10k/core.h  |   27 +++
>  drivers/net/wireless/ath/ath10k/debug.c |  273 +++++++++++++++++++++++++++++++
>  drivers/net/wireless/ath/ath10k/debug.h |   22 ++
>  drivers/net/wireless/ath/ath10k/hw.h    |   30 +++
>  drivers/net/wireless/ath/ath10k/pci.c   |  133 ++++++++++++++-
>  drivers/net/wireless/ath/ath10k/pci.h   |    3
>  6 files changed, 478 insertions(+), 10 deletions(-)
[...]

> +struct ath10k_dump_file_data {
> +       /* dump file information */
> +
> +       /* "ATH10K-FW-DUMP" */
> +       char df_magic[16];
> +
> +       u32 len;
> +
> +       /* 0x1 if host is big-endian */
> +       u32 big_endian;

This isn't entirely correct. Depending on host endianess you'll end up
with 0x1 or 0x1000000. This will still work if you do a boolean
evaluation of it in userspace or compare it to 0, but god forbid to
compare it with 0x1.


> +
> +       /* file dump version, 1 for now. */
> +       u32 version;

I think this should have a #define instead of the comment. You'll need
to update 2 values when you bump the version with comment+hardcode
approach.

> +
> +       /* some info we can get from ath10k struct that might help */
> +
> +       u8 uuid[16];
> +
> +       u32 chip_id;
> +
> +       /* 0 for now, in place for later hardware */
> +       u32 bus_type;

Maybe we should have an enum for that instead of using a hardcoded 0?


> +       /* time-of-day stamp, nano-seconds */
> +       u64 tv_nsec;
> +
> +
> +       /* LINUX_VERSION_CODE */
> +       u32 kernel_ver_code;

2 empty newlines?


> +static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
> +{
> +       struct ath10k_fw_crash_data *crash_data = ar->debug.fw_crash_data;
> +       struct ath10k_dump_file_data *dump_data;
> +       struct ath10k_tlv_dump_data *dump_tlv;
> +       int hdr_len = sizeof(*dump_data);
> +       unsigned int len, sofar = 0;
> +       unsigned char *buf;
> +
> +       lockdep_assert_held(&ar->conf_mutex);
> +
> +       spin_lock_bh(&ar->data_lock);
> +
> +       if (!crash_data->crashed_since_read) {
> +               spin_unlock_bh(&ar->data_lock);
> +               return NULL;
> +       }
> +
> +       spin_unlock_bh(&ar->data_lock);
> +
> +       len = hdr_len;
> +       len += sizeof(*dump_tlv) + sizeof(crash_data->reg_dump_values);
> +       len += sizeof(*dump_tlv) + sizeof(crash_data->dbglog_entry_data);
> +
> +       sofar += hdr_len;
> +
> +       /* This is going to get big when we start dumping FW RAM and such,
> +        * so go ahead and use vmalloc.
> +        */
> +       buf = vmalloc(len);
> +       if (!buf)
> +               return NULL;
> +
> +       spin_lock_bh(&ar->data_lock);

The current code doesn't seem to allow it, but according to comments
crashed_since_read is protected by data_lock only. As such it might've
changed while the lock was released.

Current code, however, guarantees it remains true while conf_mutex is held.

Perhaps the vmalloc() should be done before spin_lock is acquired
and/or the memory should be allocated outside this function completely
and make it consume the crashed_since_read (i.e. set it to false) once
it's done (while the data_lock is still held).


> +
> +       memset(buf, 0, len);
> +       dump_data = (struct ath10k_dump_file_data *)(buf);
> +       strlcpy(dump_data->df_magic, "ATH10K-FW-DUMP",
> +               sizeof(dump_data->df_magic));
> +       dump_data->len = len;
> +
> +#ifdef __BIG_ENDIAN
> +       dump_data->big_endian = 1;
> +#else
> +       dump_data->big_endian = 0;
> +#endif

Yuck.


> +static int ath10k_fw_crash_dump_open(struct inode *inode, struct file *file)
> +{
> +       struct ath10k *ar = inode->i_private;
> +       struct ath10k_dump_file_data *dump;
> +       int ret;
> +
> +       mutex_lock(&ar->conf_mutex);
> +
> +       dump = ath10k_build_dump_file(ar);
> +       if (!dump) {
> +               ret = -ENODATA;
> +               goto out;
> +       }
> +
> +       file->private_data = dump;

> +       ar->debug.fw_crash_data->crashed_since_read = false;

According to comments this should be protected by data_lock, but isn't.

> +       ret = 0;
> +
> +out:
> +       mutex_unlock(&ar->conf_mutex);
> +       return ret;
> +}


>  static int ath10k_debug_htt_stats_req(struct ath10k *ar)
>  {
>         u64 cookie;
> @@ -913,6 +1178,10 @@ static const struct file_operations fops_dfs_stats = {
>
>  int ath10k_debug_create(struct ath10k *ar)
>  {
> +       ar->debug.fw_crash_data = vzalloc(sizeof(*ar->debug.fw_crash_data));
> +       if (!ar->debug.fw_crash_data)
> +               return -ENOMEM;
> +
>         ar->debug.debugfs_phy = debugfs_create_dir("ath10k",
>                                                    ar->hw->wiphy->debugfsdir);

I think there's a check if debug_phy is NULL. If it is it does return
-ENOMEM. This means you leak fw_crash_data.


> +/* Target debug log related defines and structs */
> +
> +/* Target is 32-bit CPU, so we just use u32 for
> + * the pointers.  The memory space is relative to the
> + * target, not the host.
> + */
> +struct ath10k_fw_dbglog_buf {
> +       /* pointer to dblog_buf_s */
> +       u32 next;
> +
> +       /* pointer to u8 buffer */
> +       u32 buffer;
> +
> +       u32 bufsize;
> +       u32 length;
> +       u32 count;
> +       u32 free;
> +} __packed;
> +
> +struct ath10k_fw_dbglog_hdr {
> +       /* pointer to dbglog_buf_s */
> +       u32 dbuf;
> +
> +       u32 dropped;
> +} __packed;
This is confusing.

Target is a 32-bit *Little-Endian* CPU but due to implicit byteswap in
ath10k_pci_diag_* functions everything is already in host endianess.


Micha?
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kalle Valo Aug. 18, 2014, 11:39 a.m. UTC | #2
Michal Kazior <michal.kazior@tieto.com> writes:

> On 9 August 2014 20:08, Kalle Valo <kvalo@qca.qualcomm.com> wrote:
>> From: Ben Greear <greearb@candelatech.com>
>>
>> Store the firmware crash registers and last 128 or so
>> firmware debug-log ids and present them to user-space
>> via debugfs.
>>
>> Should help with figuring out why the firmware crashed.
>>
>> Signed-off-by: Ben Greear <greearb@candelatech.com>
>> Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
>> ---
>>  drivers/net/wireless/ath/ath10k/core.h  |   27 +++
>>  drivers/net/wireless/ath/ath10k/debug.c |  273 +++++++++++++++++++++++++++++++
>>  drivers/net/wireless/ath/ath10k/debug.h |   22 ++
>>  drivers/net/wireless/ath/ath10k/hw.h    |   30 +++
>>  drivers/net/wireless/ath/ath10k/pci.c   |  133 ++++++++++++++-
>>  drivers/net/wireless/ath/ath10k/pci.h   |    3
>>  6 files changed, 478 insertions(+), 10 deletions(-)
> [...]
>
>> +struct ath10k_dump_file_data {
>> +       /* dump file information */
>> +
>> +       /* "ATH10K-FW-DUMP" */
>> +       char df_magic[16];
>> +
>> +       u32 len;
>> +
>> +       /* 0x1 if host is big-endian */
>> +       u32 big_endian;
>
> This isn't entirely correct. Depending on host endianess you'll end up
> with 0x1 or 0x1000000. This will still work if you do a boolean
> evaluation of it in userspace or compare it to 0, but god forbid to
> compare it with 0x1.

That's true. Didn't you at one point suggest just always using little
endian? I think that's simplest approach here.

>> +
>> +       /* file dump version, 1 for now. */
>> +       u32 version;
>
> I think this should have a #define instead of the comment. You'll need
> to update 2 values when you bump the version with comment+hardcode
> approach.

Good point, I'll add that.

>> +       /* some info we can get from ath10k struct that might help */
>> +
>> +       u8 uuid[16];
>> +
>> +       u32 chip_id;
>> +
>> +       /* 0 for now, in place for later hardware */
>> +       u32 bus_type;
>
> Maybe we should have an enum for that instead of using a hardcoded 0?

We had that but you removed it in 3a0861fffd223 =)

>> +       /* time-of-day stamp, nano-seconds */
>> +       u64 tv_nsec;
>> +
>> +
>> +       /* LINUX_VERSION_CODE */
>> +       u32 kernel_ver_code;
>
> 2 empty newlines?

Will fix.

>> +static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
>> +{
>> +       struct ath10k_fw_crash_data *crash_data = ar->debug.fw_crash_data;
>> +       struct ath10k_dump_file_data *dump_data;
>> +       struct ath10k_tlv_dump_data *dump_tlv;
>> +       int hdr_len = sizeof(*dump_data);
>> +       unsigned int len, sofar = 0;
>> +       unsigned char *buf;
>> +
>> +       lockdep_assert_held(&ar->conf_mutex);
>> +
>> +       spin_lock_bh(&ar->data_lock);
>> +
>> +       if (!crash_data->crashed_since_read) {
>> +               spin_unlock_bh(&ar->data_lock);
>> +               return NULL;
>> +       }
>> +
>> +       spin_unlock_bh(&ar->data_lock);
>> +
>> +       len = hdr_len;
>> +       len += sizeof(*dump_tlv) + sizeof(crash_data->reg_dump_values);
>> +       len += sizeof(*dump_tlv) + sizeof(crash_data->dbglog_entry_data);
>> +
>> +       sofar += hdr_len;
>> +
>> +       /* This is going to get big when we start dumping FW RAM and such,
>> +        * so go ahead and use vmalloc.
>> +        */
>> +       buf = vmalloc(len);
>> +       if (!buf)
>> +               return NULL;
>> +
>> +       spin_lock_bh(&ar->data_lock);
>
> The current code doesn't seem to allow it, but according to comments
> crashed_since_read is protected by data_lock only. As such it might've
> changed while the lock was released.

Another good point.

> Current code, however, guarantees it remains true while conf_mutex is
> held.
>
> Perhaps the vmalloc() should be done before spin_lock is acquired

That sounds the simple way to do it. We get a useless allocation when
there's no crash data, but that's so bad.

> and/or the memory should be allocated outside this function completely
> and make it consume the crashed_since_read (i.e. set it to false) once
> it's done (while the data_lock is still held).

You mean like allocating the memory in advance, for example during
module probe time? Then we would have a big chunk of memory we don't use
for anything most of the time.

>> +       memset(buf, 0, len);
>> +       dump_data = (struct ath10k_dump_file_data *)(buf);
>> +       strlcpy(dump_data->df_magic, "ATH10K-FW-DUMP",
>> +               sizeof(dump_data->df_magic));
>> +       dump_data->len = len;
>> +
>> +#ifdef __BIG_ENDIAN
>> +       dump_data->big_endian = 1;
>> +#else
>> +       dump_data->big_endian = 0;
>> +#endif
>
> Yuck.

Yeah. I'm thinking of switching to little endian more and more :)

>> +static int ath10k_fw_crash_dump_open(struct inode *inode, struct file *file)
>> +{
>> +       struct ath10k *ar = inode->i_private;
>> +       struct ath10k_dump_file_data *dump;
>> +       int ret;
>> +
>> +       mutex_lock(&ar->conf_mutex);
>> +
>> +       dump = ath10k_build_dump_file(ar);
>> +       if (!dump) {
>> +               ret = -ENODATA;
>> +               goto out;
>> +       }
>> +
>> +       file->private_data = dump;
>
>> +       ar->debug.fw_crash_data->crashed_since_read = false;
>
> According to comments this should be protected by data_lock, but
> isn't.

Yup. I'll move crashed_since_read handling to ath10k_build_dump_file().

>
>> +       ret = 0;
>> +
>> +out:
>> +       mutex_unlock(&ar->conf_mutex);
>> +       return ret;
>> +}
>
>
>>  static int ath10k_debug_htt_stats_req(struct ath10k *ar)
>>  {
>>         u64 cookie;
>> @@ -913,6 +1178,10 @@ static const struct file_operations fops_dfs_stats = {
>>
>>  int ath10k_debug_create(struct ath10k *ar)
>>  {
>> +       ar->debug.fw_crash_data = vzalloc(sizeof(*ar->debug.fw_crash_data));
>> +       if (!ar->debug.fw_crash_data)
>> +               return -ENOMEM;
>> +
>>         ar->debug.debugfs_phy = debugfs_create_dir("ath10k",
>>                                                    ar->hw->wiphy->debugfsdir);
>
> I think there's a check if debug_phy is NULL. If it is it does return
> -ENOMEM. This means you leak fw_crash_data.

Indeed. I'll fix that.

>> +/* Target debug log related defines and structs */
>> +
>> +/* Target is 32-bit CPU, so we just use u32 for
>> + * the pointers.  The memory space is relative to the
>> + * target, not the host.
>> + */
>> +struct ath10k_fw_dbglog_buf {
>> +       /* pointer to dblog_buf_s */
>> +       u32 next;
>> +
>> +       /* pointer to u8 buffer */
>> +       u32 buffer;
>> +
>> +       u32 bufsize;
>> +       u32 length;
>> +       u32 count;
>> +       u32 free;
>> +} __packed;
>> +
>> +struct ath10k_fw_dbglog_hdr {
>> +       /* pointer to dbglog_buf_s */
>> +       u32 dbuf;
>> +
>> +       u32 dropped;
>> +} __packed;
>
> This is confusing.

Sorry, what are referring to here? I don't understand your comment.

> Target is a 32-bit *Little-Endian* CPU but due to implicit byteswap in
> ath10k_pci_diag_* functions everything is already in host endianess.

I think I'll that as a comment here.
Michal Kazior Aug. 18, 2014, 12:36 p.m. UTC | #3
On 18 August 2014 13:39, Kalle Valo <kvalo@qca.qualcomm.com> wrote:
> Michal Kazior <michal.kazior@tieto.com> writes:
>
>> On 9 August 2014 20:08, Kalle Valo <kvalo@qca.qualcomm.com> wrote:
>>> From: Ben Greear <greearb@candelatech.com>
>>>
>>> Store the firmware crash registers and last 128 or so
>>> firmware debug-log ids and present them to user-space
>>> via debugfs.
>>>
>>> Should help with figuring out why the firmware crashed.
>>>
>>> Signed-off-by: Ben Greear <greearb@candelatech.com>
>>> Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
>>> ---
>>>  drivers/net/wireless/ath/ath10k/core.h  |   27 +++
>>>  drivers/net/wireless/ath/ath10k/debug.c |  273 +++++++++++++++++++++++++++++++
>>>  drivers/net/wireless/ath/ath10k/debug.h |   22 ++
>>>  drivers/net/wireless/ath/ath10k/hw.h    |   30 +++
>>>  drivers/net/wireless/ath/ath10k/pci.c   |  133 ++++++++++++++-
>>>  drivers/net/wireless/ath/ath10k/pci.h   |    3
>>>  6 files changed, 478 insertions(+), 10 deletions(-)
>> [...]
>>
>>> +struct ath10k_dump_file_data {
>>> +       /* dump file information */
>>> +
>>> +       /* "ATH10K-FW-DUMP" */
>>> +       char df_magic[16];
>>> +
>>> +       u32 len;
>>> +
>>> +       /* 0x1 if host is big-endian */
>>> +       u32 big_endian;
>>
>> This isn't entirely correct. Depending on host endianess you'll end up
>> with 0x1 or 0x1000000. This will still work if you do a boolean
>> evaluation of it in userspace or compare it to 0, but god forbid to
>> compare it with 0x1.
>
> That's true. Didn't you at one point suggest just always using little
> endian? I think that's simplest approach here.

Yes. I did point that out at some time ago.


>>> +       /* some info we can get from ath10k struct that might help */
>>> +
>>> +       u8 uuid[16];
>>> +
>>> +       u32 chip_id;
>>> +
>>> +       /* 0 for now, in place for later hardware */
>>> +       u32 bus_type;
>>
>> Maybe we should have an enum for that instead of using a hardcoded 0?
>
> We had that but you removed it in 3a0861fffd223 =)

.. right :-) I suppose we could just remove the bus_type then? We do
have an unused[128] for future expansion, don't we?


>> and/or the memory should be allocated outside this function completely
>> and make it consume the crashed_since_read (i.e. set it to false) once
>> it's done (while the data_lock is still held).
>
> You mean like allocating the memory in advance, for example during
> module probe time? Then we would have a big chunk of memory we don't use
> for anything most of the time.

I meant just moving it up in the call stack, e.g. to
ath10k_fw_crash_dump_open().


Micha?
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kalle Valo Aug. 18, 2014, 12:53 p.m. UTC | #4
Michal Kazior <michal.kazior@tieto.com> writes:

> On 18 August 2014 13:39, Kalle Valo <kvalo@qca.qualcomm.com> wrote:
>> Michal Kazior <michal.kazior@tieto.com> writes:
>>
>>> On 9 August 2014 20:08, Kalle Valo <kvalo@qca.qualcomm.com> wrote:
>>>
>>>> +struct ath10k_dump_file_data {
>>>> +       /* dump file information */
>>>> +
>>>> +       /* "ATH10K-FW-DUMP" */
>>>> +       char df_magic[16];
>>>> +
>>>> +       u32 len;
>>>> +
>>>> +       /* 0x1 if host is big-endian */
>>>> +       u32 big_endian;
>>>
>>> This isn't entirely correct. Depending on host endianess you'll end up
>>> with 0x1 or 0x1000000. This will still work if you do a boolean
>>> evaluation of it in userspace or compare it to 0, but god forbid to
>>> compare it with 0x1.
>>
>> That's true. Didn't you at one point suggest just always using little
>> endian? I think that's simplest approach here.
>
> Yes. I did point that out at some time ago.

Ok. I started converting to use little endian already.

>>>> +       /* some info we can get from ath10k struct that might help */
>>>> +
>>>> +       u8 uuid[16];
>>>> +
>>>> +       u32 chip_id;
>>>> +
>>>> +       /* 0 for now, in place for later hardware */
>>>> +       u32 bus_type;
>>>
>>> Maybe we should have an enum for that instead of using a hardcoded 0?
>>
>> We had that but you removed it in 3a0861fffd223 =)
>
> .. right :-) 

Sorry, I couldn't resist :)

> I suppose we could just remove the bus_type then? We do have an
> unused[128] for future expansion, don't we?

We could, but then we would have to modify the crash dump tools. I would
rather be prepared for this, it's only a u32 anyway. If we have to do
something, I would prefer to get back the enum.
diff mbox

Patch

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index d5c95d46e841..77fb36d378c8 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -22,6 +22,8 @@ 
 #include <linux/if_ether.h>
 #include <linux/types.h>
 #include <linux/pci.h>
+#include <linux/uuid.h>
+#include <linux/time.h>
 
 #include "htt.h"
 #include "htc.h"
@@ -278,6 +280,29 @@  struct ath10k_vif_iter {
 	struct ath10k_vif *arvif;
 };
 
+/* This will store at least the last 128 entries.  Each dbglog message
+ * is a max of 7 32-bit integers in length, but the length can be less
+ * than that as well.
+ */
+#define ATH10K_DBGLOG_DATA_LEN (128 * 7 * sizeof(u32))
+
+struct ath10k_dbglog_entry_storage {
+	/* where to write next chunk of data */
+	u32 next_idx;
+
+	u8 data[ATH10K_DBGLOG_DATA_LEN];
+};
+
+/* used for crash-dump storage, protected by data-lock */
+struct ath10k_fw_crash_data {
+	bool crashed_since_read;
+
+	uuid_le uuid;
+	struct timespec timestamp;
+	struct ath10k_dbglog_entry_storage dbglog_entry_data;
+	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
+};
+
 struct ath10k_debug {
 	struct dentry *debugfs_phy;
 
@@ -295,6 +320,8 @@  struct ath10k_debug {
 
 	u8 htt_max_amsdu;
 	u8 htt_max_ampdu;
+
+	struct ath10k_fw_crash_data *fw_crash_data;
 };
 
 enum ath10k_state {
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index c9e35c87edfb..d6b49a9e1585 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -17,6 +17,9 @@ 
 
 #include <linux/module.h>
 #include <linux/debugfs.h>
+#include <linux/version.h>
+#include <linux/vermagic.h>
+#include <linux/vmalloc.h>
 
 #include "core.h"
 #include "debug.h"
@@ -24,6 +27,89 @@ 
 /* ms */
 #define ATH10K_DEBUG_HTT_STATS_INTERVAL 1000
 
+/**
+ * enum ath10k_fw_crash_dump_type - types of data in the dump file
+ * @ATH10K_FW_CRASH_DUMP_DBGLOG:  Recent firmware debug log entries
+ * @ATH10K_FW_CRASH_DUMP_REGDUMP: Register crash dump in binary format
+ */
+enum ath10k_fw_crash_dump_type {
+	ATH10K_FW_CRASH_DUMP_DBGLOG = 0,
+	ATH10K_FW_CRASH_DUMP_REGDUMP = 1,
+
+	ATH10K_FW_CRASH_DUMP_MAX,
+};
+
+struct ath10k_tlv_dump_data {
+	/* see ath10k_fw_crash_dump_type above */
+	u32 type;
+
+	/* in bytes */
+	u32 tlv_len;
+
+	/* pad to 32-bit boundaries as needed */
+	u8 tlv_data[];
+} __packed;
+
+struct ath10k_dump_file_data {
+	/* dump file information */
+
+	/* "ATH10K-FW-DUMP" */
+	char df_magic[16];
+
+	u32 len;
+
+	/* 0x1 if host is big-endian */
+	u32 big_endian;
+
+	/* file dump version, 1 for now. */
+	u32 version;
+
+	/* some info we can get from ath10k struct that might help */
+
+	u8 uuid[16];
+
+	u32 chip_id;
+
+	/* 0 for now, in place for later hardware */
+	u32 bus_type;
+
+	u32 target_version;
+	u32 fw_version_major;
+	u32 fw_version_minor;
+	u32 fw_version_release;
+	u32 fw_version_build;
+	u32 phy_capability;
+	u32 hw_min_tx_power;
+	u32 hw_max_tx_power;
+	u32 ht_cap_info;
+	u32 vht_cap_info;
+	u32 num_rf_chains;
+
+	/* firmware version string */
+	char fw_ver[ETHTOOL_FWVERS_LEN];
+
+	/* Kernel related information */
+
+	/* time-of-day stamp */
+	u64 tv_sec;
+
+	/* time-of-day stamp, nano-seconds */
+	u64 tv_nsec;
+
+
+	/* LINUX_VERSION_CODE */
+	u32 kernel_ver_code;
+
+	/* VERMAGIC_STRING */
+	char kernel_ver[64];
+
+	/* room for growth w/out changing binary format */
+	u8 unused[128];
+
+	/* struct ath10k_tlv_dump_data + more */
+	u8 data[0];
+} __packed;
+
 static int ath10k_printk(const char *level, const char *fmt, ...)
 {
 	struct va_format vaf;
@@ -580,6 +666,185 @@  static const struct file_operations fops_chip_id = {
 	.llseek = default_llseek,
 };
 
+struct ath10k_fw_crash_data *
+ath10k_debug_get_new_fw_crash_data(struct ath10k *ar)
+{
+	struct ath10k_fw_crash_data *crash_data = ar->debug.fw_crash_data;
+
+	lockdep_assert_held(&ar->data_lock);
+
+	crash_data->crashed_since_read = true;
+	uuid_le_gen(&crash_data->uuid);
+	getnstimeofday(&crash_data->timestamp);
+
+	return crash_data;
+}
+EXPORT_SYMBOL(ath10k_debug_get_new_fw_crash_data);
+
+void ath10k_debug_dbglog_add(struct ath10k *ar, u8 *buffer, int len)
+{
+	struct ath10k_fw_crash_data *crash_data = ar->debug.fw_crash_data;
+	int i, z;
+
+	lockdep_assert_held(&ar->data_lock);
+
+	z = crash_data->dbglog_entry_data.next_idx;
+
+	for (i = 0; i < len; i++) {
+		crash_data->dbglog_entry_data.data[z] = buffer[i];
+		z++;
+		if (z >= ATH10K_DBGLOG_DATA_LEN)
+			z = 0;
+	}
+
+	crash_data->dbglog_entry_data.next_idx = z;
+}
+EXPORT_SYMBOL(ath10k_debug_dbglog_add);
+
+static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
+{
+	struct ath10k_fw_crash_data *crash_data = ar->debug.fw_crash_data;
+	struct ath10k_dump_file_data *dump_data;
+	struct ath10k_tlv_dump_data *dump_tlv;
+	int hdr_len = sizeof(*dump_data);
+	unsigned int len, sofar = 0;
+	unsigned char *buf;
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	spin_lock_bh(&ar->data_lock);
+
+	if (!crash_data->crashed_since_read) {
+		spin_unlock_bh(&ar->data_lock);
+		return NULL;
+	}
+
+	spin_unlock_bh(&ar->data_lock);
+
+	len = hdr_len;
+	len += sizeof(*dump_tlv) + sizeof(crash_data->reg_dump_values);
+	len += sizeof(*dump_tlv) + sizeof(crash_data->dbglog_entry_data);
+
+	sofar += hdr_len;
+
+	/* This is going to get big when we start dumping FW RAM and such,
+	 * so go ahead and use vmalloc.
+	 */
+	buf = vmalloc(len);
+	if (!buf)
+		return NULL;
+
+	spin_lock_bh(&ar->data_lock);
+
+	memset(buf, 0, len);
+	dump_data = (struct ath10k_dump_file_data *)(buf);
+	strlcpy(dump_data->df_magic, "ATH10K-FW-DUMP",
+		sizeof(dump_data->df_magic));
+	dump_data->len = len;
+
+#ifdef __BIG_ENDIAN
+	dump_data->big_endian = 1;
+#else
+	dump_data->big_endian = 0;
+#endif
+
+	dump_data->version = 1;
+	memcpy(dump_data->uuid, &crash_data->uuid, sizeof(dump_data->uuid));
+	dump_data->chip_id = ar->chip_id;
+	dump_data->bus_type = 0;
+	dump_data->target_version = ar->target_version;
+	dump_data->fw_version_major = ar->fw_version_major;
+	dump_data->fw_version_minor = ar->fw_version_minor;
+	dump_data->fw_version_release = ar->fw_version_release;
+	dump_data->fw_version_build = ar->fw_version_build;
+	dump_data->phy_capability = ar->phy_capability;
+	dump_data->hw_min_tx_power = ar->hw_min_tx_power;
+	dump_data->hw_max_tx_power = ar->hw_max_tx_power;
+	dump_data->ht_cap_info = ar->ht_cap_info;
+	dump_data->vht_cap_info = ar->vht_cap_info;
+	dump_data->num_rf_chains = ar->num_rf_chains;
+
+	strlcpy(dump_data->fw_ver, ar->hw->wiphy->fw_version,
+		sizeof(dump_data->fw_ver));
+
+	dump_data->kernel_ver_code = LINUX_VERSION_CODE;
+	strlcpy(dump_data->kernel_ver, VERMAGIC_STRING,
+		sizeof(dump_data->kernel_ver));
+
+	dump_data->tv_sec = crash_data->timestamp.tv_sec;
+	dump_data->tv_nsec = crash_data->timestamp.tv_nsec;
+
+	/* Gather dbg-log */
+	dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+	dump_tlv->type = ATH10K_FW_CRASH_DUMP_DBGLOG;
+	dump_tlv->tlv_len = sizeof(crash_data->dbglog_entry_data);
+	memcpy(dump_tlv->tlv_data, &crash_data->dbglog_entry_data,
+	       dump_tlv->tlv_len);
+	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+
+	/* Gather crash-dump */
+	dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+	dump_tlv->type = ATH10K_FW_CRASH_DUMP_REGDUMP;
+	dump_tlv->tlv_len = sizeof(crash_data->reg_dump_values);
+	memcpy(dump_tlv->tlv_data, &crash_data->reg_dump_values,
+	       dump_tlv->tlv_len);
+	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+
+	spin_unlock_bh(&ar->data_lock);
+
+	return dump_data;
+}
+
+static int ath10k_fw_crash_dump_open(struct inode *inode, struct file *file)
+{
+	struct ath10k *ar = inode->i_private;
+	struct ath10k_dump_file_data *dump;
+	int ret;
+
+	mutex_lock(&ar->conf_mutex);
+
+	dump = ath10k_build_dump_file(ar);
+	if (!dump) {
+		ret = -ENODATA;
+		goto out;
+	}
+
+	file->private_data = dump;
+	ar->debug.fw_crash_data->crashed_since_read = false;
+	ret = 0;
+
+out:
+	mutex_unlock(&ar->conf_mutex);
+	return ret;
+}
+
+static ssize_t ath10k_fw_crash_dump_read(struct file *file,
+					 char __user *user_buf,
+					 size_t count, loff_t *ppos)
+{
+	struct ath10k_dump_file_data *dump_file = file->private_data;
+
+	return simple_read_from_buffer(user_buf, count, ppos,
+				       dump_file,
+				       dump_file->len);
+}
+
+static int ath10k_fw_crash_dump_release(struct inode *inode,
+					struct file *file)
+{
+	vfree(file->private_data);
+
+	return 0;
+}
+
+static const struct file_operations fops_fw_crash_dump = {
+	.open = ath10k_fw_crash_dump_open,
+	.read = ath10k_fw_crash_dump_read,
+	.release = ath10k_fw_crash_dump_release,
+	.owner = THIS_MODULE,
+	.llseek = default_llseek,
+};
+
 static int ath10k_debug_htt_stats_req(struct ath10k *ar)
 {
 	u64 cookie;
@@ -913,6 +1178,10 @@  static const struct file_operations fops_dfs_stats = {
 
 int ath10k_debug_create(struct ath10k *ar)
 {
+	ar->debug.fw_crash_data = vzalloc(sizeof(*ar->debug.fw_crash_data));
+	if (!ar->debug.fw_crash_data)
+		return -ENOMEM;
+
 	ar->debug.debugfs_phy = debugfs_create_dir("ath10k",
 						   ar->hw->wiphy->debugfsdir);
 
@@ -933,6 +1202,9 @@  int ath10k_debug_create(struct ath10k *ar)
 	debugfs_create_file("simulate_fw_crash", S_IRUSR, ar->debug.debugfs_phy,
 			    ar, &fops_simulate_fw_crash);
 
+	debugfs_create_file("fw_crash_dump", S_IRUSR, ar->debug.debugfs_phy,
+			    ar, &fops_fw_crash_dump);
+
 	debugfs_create_file("chip_id", S_IRUSR, ar->debug.debugfs_phy,
 			    ar, &fops_chip_id);
 
@@ -965,6 +1237,7 @@  int ath10k_debug_create(struct ath10k *ar)
 
 void ath10k_debug_destroy(struct ath10k *ar)
 {
+	vfree(ar->debug.fw_crash_data);
 	cancel_delayed_work_sync(&ar->debug.htt_stats_dwork);
 }
 
diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index a5824990bd2a..80ff14e4db9b 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -53,6 +53,10 @@  void ath10k_debug_read_service_map(struct ath10k *ar,
 				   size_t map_size);
 void ath10k_debug_read_target_stats(struct ath10k *ar,
 				    struct wmi_stats_event *ev);
+struct ath10k_fw_crash_data *
+ath10k_debug_get_new_fw_crash_data(struct ath10k *ar);
+
+void ath10k_debug_dbglog_add(struct ath10k *ar, u8 *buffer, int len);
 
 #define ATH10K_DFS_STAT_INC(ar, c) (ar->debug.dfs_stats.c++)
 
@@ -86,6 +90,17 @@  static inline void ath10k_debug_read_target_stats(struct ath10k *ar,
 {
 }
 
+static inline void ath10k_debug_dbglog_add(struct ath10k *ar, u8 *buffer,
+					   int len)
+{
+}
+
+static inline struct ath10k_fw_crash_data *
+ath10k_debug_get_new_fw_crash_data(struct ath10k *ar)
+{
+	return NULL;
+}
+
 #define ATH10K_DFS_STAT_INC(ar, c) do { } while (0)
 
 #endif /* CONFIG_ATH10K_DEBUGFS */
@@ -96,6 +111,7 @@  __printf(2, 3) void ath10k_dbg(enum ath10k_debug_mask mask,
 void ath10k_dbg_dump(enum ath10k_debug_mask mask,
 		     const char *msg, const char *prefix,
 		     const void *buf, size_t len);
+
 #else /* CONFIG_ATH10K_DEBUG */
 
 static inline int ath10k_dbg(enum ath10k_debug_mask dbg_mask,
@@ -109,5 +125,11 @@  static inline void ath10k_dbg_dump(enum ath10k_debug_mask mask,
 				   const void *buf, size_t len)
 {
 }
+
+static inline void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar,
+						 u8 *buffer, int len)
+{
+}
 #endif /* CONFIG_ATH10K_DEBUG */
+
 #endif /* _DEBUG_H_ */
diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h
index ffd04890407e..c391c88096ee 100644
--- a/drivers/net/wireless/ath/ath10k/hw.h
+++ b/drivers/net/wireless/ath/ath10k/hw.h
@@ -39,6 +39,8 @@ 
 /* includes also the null byte */
 #define ATH10K_FIRMWARE_MAGIC               "QCA-ATH10K"
 
+#define REG_DUMP_COUNT_QCA988X 60
+
 struct ath10k_fw_ie {
 	__le32 id;
 	__le32 len;
@@ -362,4 +364,32 @@  enum ath10k_mcast2ucast_mode {
 
 #define RTC_STATE_V_GET(x) (((x) & RTC_STATE_V_MASK) >> RTC_STATE_V_LSB)
 
+
+/* Target debug log related defines and structs */
+
+/* Target is 32-bit CPU, so we just use u32 for
+ * the pointers.  The memory space is relative to the
+ * target, not the host.
+ */
+struct ath10k_fw_dbglog_buf {
+	/* pointer to dblog_buf_s */
+	u32 next;
+
+	/* pointer to u8 buffer */
+	u32 buffer;
+
+	u32 bufsize;
+	u32 length;
+	u32 count;
+	u32 free;
+} __packed;
+
+struct ath10k_fw_dbglog_hdr {
+	/* pointer to dbglog_buf_s */
+	u32 dbuf;
+
+	u32 dropped;
+} __packed;
+
+
 #endif /* _HW_H_ */
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 96ce359349cb..99e0bd0c4f4a 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -867,16 +867,103 @@  static u16 ath10k_pci_hif_get_free_queue_number(struct ath10k *ar, u8 pipe)
 	return ath10k_ce_num_free_src_entries(ar_pci->pipe_info[pipe].ce_hdl);
 }
 
-static void ath10k_pci_hif_dump_area(struct ath10k *ar)
+static void ath10k_pci_dump_dbglog(struct ath10k *ar,
+				   struct ath10k_fw_crash_data *crash_data)
+{
+	struct ath10k_fw_dbglog_hdr dbg_hdr;
+	struct ath10k_fw_dbglog_buf dbuf;
+	u8 *buffer;
+	int ret, i;
+	u32 dbufp;
+
+	lockdep_assert_held(&ar->data_lock);
+
+	/* dump the debug logs on the target */
+	ret = ath10k_pci_diag_read_hi(ar, &dbg_hdr,
+				      hi_dbglog_hdr, sizeof(dbg_hdr));
+	if (ret != 0) {
+		ath10k_warn("failed to dump debug log area from hi_dbglog_hdr: %d\n",
+			    ret);
+		return;
+	}
+
+	ath10k_dbg(ATH10K_DBG_PCI,
+		   "pci dbglog header dbuf 0x%x dropped %i\n",
+		   dbg_hdr.dbuf, dbg_hdr.dropped);
+
+	/* pointer in target memory space */
+	dbufp = dbg_hdr.dbuf;
+
+	/* i is for logging purposes and sanity check in case firmware buffers
+	 * are corrupted and will not properly terminate the list.
+	 * In standard firmware, it appears there are no more than 2
+	 * buffers, so 10 should be safe upper limit even if firmware
+	 * changes quite a bit.
+	 */
+	i = 0;
+	while (dbufp && i < 10) {
+		ret = ath10k_pci_diag_read_mem(ar, dbufp,
+					       &dbuf, sizeof(dbuf));
+		if (ret != 0) {
+			ath10k_warn("failed to read debug log area from address 0x%x: %d\n",
+				    dbufp, ret);
+			return;
+		}
+
+		/* we have a buffer of data */
+		ath10k_dbg(ATH10K_DBG_PCI,
+			   "pci dbglog [%i] next 0x%x buf 0x%x size %i len %i count %i free %i\n",
+			   i, dbuf.next, dbuf.buffer, dbuf.bufsize, dbuf.length,
+			   dbuf.count, dbuf.free);
+		if (dbuf.buffer == 0 || dbuf.length == 0)
+			goto next;
+
+		/* Pick arbitrary upper bound in case firmware is corrupted for
+		 * whatever reason.
+		 */
+		if (dbuf.length > 16000) {
+			ath10k_warn("firmware debug log buffer length is out of bounds: %d\n",
+				    dbuf.length);
+			/* do not trust the next pointer either... */
+			return;
+		}
+
+		buffer = kmalloc(dbuf.length, GFP_ATOMIC);
+
+		if (!buffer)
+			goto next;
+
+		ret = ath10k_pci_diag_read_mem(ar, dbuf.buffer, buffer,
+					       dbuf.length);
+		if (ret != 0) {
+			ath10k_warn("failed to read debug log buffer from address 0x%x: %d\n",
+				    dbuf.buffer, ret);
+			kfree(buffer);
+			return;
+		}
+
+		ath10k_debug_dbglog_add(ar, buffer, dbuf.length);
+		kfree(buffer);
+
+next:
+		dbufp = dbuf.next;
+		if (dbufp == dbg_hdr.dbuf) {
+			/* It is a circular buffer it seems, bail if next
+			 * is head.
+			 */
+			break;
+		}
+		i++;
+	} /* while we have a debug buffer to read */
+}
+
+static void ath10k_pci_dump_registers(struct ath10k *ar,
+				      struct ath10k_fw_crash_data *crash_data)
 {
-	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X] = {};
+	u32 i, reg_dump_values[REG_DUMP_COUNT_QCA988X] = {};
 	int ret;
-	u32 i;
 
-	ath10k_err("firmware crashed!\n");
-	ath10k_err("hardware name %s version 0x%x\n",
-		   ar->hw_params.name, ar->target_version);
-	ath10k_err("firmware version: %s\n", ar->hw->wiphy->fw_version);
+	lockdep_assert_held(&ar->data_lock);
 
 	ret = ath10k_pci_diag_read_hi(ar, &reg_dump_values[0],
 				      hi_failure_state,
@@ -897,6 +984,38 @@  static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 			   reg_dump_values[i + 2],
 			   reg_dump_values[i + 3]);
 
+	memcpy(crash_data->reg_dump_values, reg_dump_values,
+	       sizeof(crash_data->reg_dump_values));
+}
+
+static void ath10k_pci_hif_dump_area(struct ath10k *ar)
+{
+	struct ath10k_fw_crash_data *crash_data;
+	char uuid[50];
+
+	spin_lock_bh(&ar->data_lock);
+
+	crash_data = ath10k_debug_get_new_fw_crash_data(ar);
+
+	if (crash_data)
+		scnprintf(uuid, sizeof(uuid), "%pU", &crash_data->uuid);
+	else
+		scnprintf(uuid, sizeof(uuid), "n/a");
+
+	ath10k_err("firmware crashed! (uuid %s)\n", uuid);
+	ath10k_err("hardware name %s version 0x%x\n",
+		   ar->hw_params.name, ar->target_version);
+	ath10k_err("firmware version: %s\n", ar->hw->wiphy->fw_version);
+
+	if (!crash_data)
+		goto exit;
+
+	ath10k_pci_dump_registers(ar, crash_data);
+	ath10k_pci_dump_dbglog(ar, crash_data);
+
+exit:
+	spin_unlock_bh(&ar->data_lock);
+
 	queue_work(ar->workqueue, &ar->restart_work);
 }
 
diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index 940129209990..f72a7cdec4d4 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h
@@ -23,9 +23,6 @@ 
 #include "hw.h"
 #include "ce.h"
 
-/* FW dump area */
-#define REG_DUMP_COUNT_QCA988X 60
-
 /*
  * maximum number of bytes that can be handled atomically by DiagRead/DiagWrite
  */