diff mbox

[v2,09/21] ath10k: print fw debug messages in hex.

Message ID 1462986153-16318-10-git-send-email-greearb@candelatech.com (mailing list archive)
State Deferred
Delegated to: Kalle Valo
Headers show

Commit Message

Ben Greear May 11, 2016, 5:02 p.m. UTC
From: Ben Greear <greearb@candelatech.com>

This allows user-space tools to decode debug-log
messages by parsing dmesg or /var/log/messages.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---
 drivers/net/wireless/ath/ath10k/debug.c | 72 +++++++++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath10k/debug.h |  5 +++
 drivers/net/wireless/ath/ath10k/pci.c   |  3 ++
 drivers/net/wireless/ath/ath10k/wmi.c   | 12 ++++++
 4 files changed, 92 insertions(+)

Comments

Kalle Valo Sept. 14, 2016, 2:18 p.m. UTC | #1
greearb@candelatech.com writes:

> From: Ben Greear <greearb@candelatech.com>
>
> This allows user-space tools to decode debug-log
> messages by parsing dmesg or /var/log/messages.
>
> Signed-off-by: Ben Greear <greearb@candelatech.com>

Don't tracing points already provide the same information?

> +void ath10k_dbg_print_fw_dbg_buffer(struct ath10k *ar, __le32 *ibuf, int len,
> +				    const char* lvl)
> +{
> +	/* Print out raw hex, external tools can decode if
> +	 * they care.
> +	 * TODO:  Add ar identifier to messages.
> +	 */
> +	int q = 0;
> +
> +	dev_printk(lvl, ar->dev, "ath10k_pci ATH10K_DBG_BUFFER:\n");
> +	while (q < len) {
> +		if (q + 8 <= len) {
> +			printk("%sath10k: [%04d]: %08X %08X %08X %08X %08X %08X %08X %08X\n",
> +			       lvl, q,
> +			       ibuf[q], ibuf[q+1], ibuf[q+2], ibuf[q+3],
> +			       ibuf[q+4], ibuf[q+5], ibuf[q+6], ibuf[q+7]);
> +			q += 8;
> +		}
> +		else if (q + 7 <= len) {
> +			printk("%sath10k: [%04d]: %08X %08X %08X %08X %08X %08X %08X\n",
> +			       lvl, q,
> +			       ibuf[q], ibuf[q+1], ibuf[q+2], ibuf[q+3],
> +			       ibuf[q+4], ibuf[q+5], ibuf[q+6]);
> +			q += 7;
> +		}
> +		else if (q + 6 <= len) {
> +			printk("%sath10k: [%04d]: %08X %08X %08X %08X %08X %08X\n",
> +			       lvl, q,
> +			       ibuf[q], ibuf[q+1], ibuf[q+2], ibuf[q+3],
> +			       ibuf[q+4], ibuf[q+5]);
> +			q += 6;
> +		}
> +		else if (q + 5 <= len) {
> +			printk("%sath10k: [%04d]: %08X %08X %08X %08X %08X\n",
> +			       lvl, q,
> +			       ibuf[q], ibuf[q+1], ibuf[q+2], ibuf[q+3],
> +			       ibuf[q+4]);
> +			q += 5;
> +		}
> +		else if (q + 4 <= len) {
> +			printk("%sath10k: [%04d]: %08X %08X %08X %08X\n",
> +			       lvl, q,
> +			       ibuf[q], ibuf[q+1], ibuf[q+2], ibuf[q+3]);
> +			q += 4;
> +		}
> +		else if (q + 3 <= len) {
> +			printk("%sath10k: [%04d]: %08X %08X %08X\n",
> +			       lvl, q,
> +			       ibuf[q], ibuf[q+1], ibuf[q+2]);
> +			q += 3;
> +		}
> +		else if (q + 2 <= len) {
> +			printk("%sath10k: [%04d]: %08X %08X\n",
> +			       lvl, q,
> +			       ibuf[q], ibuf[q+1]);
> +			q += 2;
> +		}
> +		else if (q + 1 <= len) {
> +			printk("%sath10k: [%04d]: %08X\n",
> +			       lvl, q,
> +			       ibuf[q]);
> +			q += 1;
> +		}
> +		else {
> +			break;
> +		}
> +	}/* while */
> +
> +	dev_printk(lvl, ar->dev, "ATH10K_END\n");
> +}

Isn't this almost the same as what ath10k_dbg_dump() does?
Ben Greear Sept. 14, 2016, 3:13 p.m. UTC | #2
On 09/14/2016 07:18 AM, Valo, Kalle wrote:
> greearb@candelatech.com writes:
>
>> From: Ben Greear <greearb@candelatech.com>
>>
>> This allows user-space tools to decode debug-log
>> messages by parsing dmesg or /var/log/messages.
>>
>> Signed-off-by: Ben Greear <greearb@candelatech.com>
>
> Don't tracing points already provide the same information?

Tracing tools are difficult to set up and may not be available on
random embedded devices.  And if we are dealing with bug reports from
the field, most users will not be able to set it up regardless.

There are similar ways to print out hex, but the logic below creates
specific and parseable logs in the 'dmesg' output and similar.

I have written a tool that can decode these messages into useful human-readable
text so that I can debug firmware issues both locally and from field reports.

Stock firmware generates similar logs and QCA could write their own decode logic
for their firmware versions.

Thanks,
Ben


>
>> +void ath10k_dbg_print_fw_dbg_buffer(struct ath10k *ar, __le32 *ibuf, int len,
>> +				    const char* lvl)
>> +{
>> +	/* Print out raw hex, external tools can decode if
>> +	 * they care.
>> +	 * TODO:  Add ar identifier to messages.
>> +	 */
>> +	int q = 0;
>> +
>> +	dev_printk(lvl, ar->dev, "ath10k_pci ATH10K_DBG_BUFFER:\n");
>> +	while (q < len) {
>> +		if (q + 8 <= len) {
>> +			printk("%sath10k: [%04d]: %08X %08X %08X %08X %08X %08X %08X %08X\n",
>> +			       lvl, q,
>> +			       ibuf[q], ibuf[q+1], ibuf[q+2], ibuf[q+3],
>> +			       ibuf[q+4], ibuf[q+5], ibuf[q+6], ibuf[q+7]);
>> +			q += 8;
>> +		}
>> +		else if (q + 7 <= len) {
>> +			printk("%sath10k: [%04d]: %08X %08X %08X %08X %08X %08X %08X\n",
>> +			       lvl, q,
>> +			       ibuf[q], ibuf[q+1], ibuf[q+2], ibuf[q+3],
>> +			       ibuf[q+4], ibuf[q+5], ibuf[q+6]);
>> +			q += 7;
>> +		}
>> +		else if (q + 6 <= len) {
>> +			printk("%sath10k: [%04d]: %08X %08X %08X %08X %08X %08X\n",
>> +			       lvl, q,
>> +			       ibuf[q], ibuf[q+1], ibuf[q+2], ibuf[q+3],
>> +			       ibuf[q+4], ibuf[q+5]);
>> +			q += 6;
>> +		}
>> +		else if (q + 5 <= len) {
>> +			printk("%sath10k: [%04d]: %08X %08X %08X %08X %08X\n",
>> +			       lvl, q,
>> +			       ibuf[q], ibuf[q+1], ibuf[q+2], ibuf[q+3],
>> +			       ibuf[q+4]);
>> +			q += 5;
>> +		}
>> +		else if (q + 4 <= len) {
>> +			printk("%sath10k: [%04d]: %08X %08X %08X %08X\n",
>> +			       lvl, q,
>> +			       ibuf[q], ibuf[q+1], ibuf[q+2], ibuf[q+3]);
>> +			q += 4;
>> +		}
>> +		else if (q + 3 <= len) {
>> +			printk("%sath10k: [%04d]: %08X %08X %08X\n",
>> +			       lvl, q,
>> +			       ibuf[q], ibuf[q+1], ibuf[q+2]);
>> +			q += 3;
>> +		}
>> +		else if (q + 2 <= len) {
>> +			printk("%sath10k: [%04d]: %08X %08X\n",
>> +			       lvl, q,
>> +			       ibuf[q], ibuf[q+1]);
>> +			q += 2;
>> +		}
>> +		else if (q + 1 <= len) {
>> +			printk("%sath10k: [%04d]: %08X\n",
>> +			       lvl, q,
>> +			       ibuf[q]);
>> +			q += 1;
>> +		}
>> +		else {
>> +			break;
>> +		}
>> +	}/* while */
>> +
>> +	dev_printk(lvl, ar->dev, "ATH10K_END\n");
>> +}
>
> Isn't this almost the same as what ath10k_dbg_dump() does?
>
Kalle Valo Sept. 15, 2016, 2:06 p.m. UTC | #3
Ben Greear <greearb@candelatech.com> writes:

> On 09/14/2016 07:18 AM, Valo, Kalle wrote:
>> greearb@candelatech.com writes:
>>
>>> From: Ben Greear <greearb@candelatech.com>
>>>
>>> This allows user-space tools to decode debug-log
>>> messages by parsing dmesg or /var/log/messages.
>>>
>>> Signed-off-by: Ben Greear <greearb@candelatech.com>
>>
>> Don't tracing points already provide the same information?
>
> Tracing tools are difficult to set up and may not be available on
> random embedded devices.  And if we are dealing with bug reports from
> the field, most users will not be able to set it up regardless.
>
> There are similar ways to print out hex, but the logic below creates
> specific and parseable logs in the 'dmesg' output and similar.
>
> I have written a tool that can decode these messages into useful human-readable
> text so that I can debug firmware issues both locally and from field reports.
>
> Stock firmware generates similar logs and QCA could write their own decode logic
> for their firmware versions.

Reinventing the wheel by using printk as the delivery mechanism doesn't
sound like a good idea. IIRC Emmanuel talked about some kind of firmware
debugging framework, he might have some ideas.
Ben Greear Sept. 15, 2016, 3:14 p.m. UTC | #4
On 09/15/2016 07:06 AM, Valo, Kalle wrote:
> Ben Greear <greearb@candelatech.com> writes:
>
>> On 09/14/2016 07:18 AM, Valo, Kalle wrote:
>>> greearb@candelatech.com writes:
>>>
>>>> From: Ben Greear <greearb@candelatech.com>
>>>>
>>>> This allows user-space tools to decode debug-log
>>>> messages by parsing dmesg or /var/log/messages.
>>>>
>>>> Signed-off-by: Ben Greear <greearb@candelatech.com>
>>>
>>> Don't tracing points already provide the same information?
>>
>> Tracing tools are difficult to set up and may not be available on
>> random embedded devices.  And if we are dealing with bug reports from
>> the field, most users will not be able to set it up regardless.
>>
>> There are similar ways to print out hex, but the logic below creates
>> specific and parseable logs in the 'dmesg' output and similar.
>>
>> I have written a tool that can decode these messages into useful human-readable
>> text so that I can debug firmware issues both locally and from field reports.
>>
>> Stock firmware generates similar logs and QCA could write their own decode logic
>> for their firmware versions.
>
> Reinventing the wheel by using printk as the delivery mechanism doesn't
> sound like a good idea. IIRC Emmanuel talked about some kind of firmware
> debugging framework, he might have some ideas.

Waiting for magical frameworks to fix problems is even worse.

It has been years since ath10k has been in the kernel.  There is basically
still no way to debug what the firmware is doing.

My patch gives you something that can work right now, with the standard 'dmesg'
framework found in virtually all kernels new and old, and it has been proven
to be useful in the field.  The messages are also nicely interleaved with the
rest of the mac80211 stack messages and any other driver messages, so you have
context.

If someone wants to add support for a framework later, then by all means, post
the patches when it is ready.

Thanks,
Ben
Grumbach, Emmanuel Sept. 15, 2016, 5:34 p.m. UTC | #5
On Thu, 2016-09-15 at 08:14 -0700, Ben Greear wrote:
> On 09/15/2016 07:06 AM, Valo, Kalle wrote:
> > Ben Greear <greearb@candelatech.com> writes:
> > 
> > > On 09/14/2016 07:18 AM, Valo, Kalle wrote:
> > > > greearb@candelatech.com writes:
> > > > 
> > > > > From: Ben Greear <greearb@candelatech.com>
> > > > > 
> > > > > This allows user-space tools to decode debug-log
> > > > > messages by parsing dmesg or /var/log/messages.
> > > > > 
> > > > > Signed-off-by: Ben Greear <greearb@candelatech.com>
> > > > 
> > > > Don't tracing points already provide the same information?
> > > 
> > > Tracing tools are difficult to set up and may not be available on
> > > random embedded devices.  And if we are dealing with bug reports
> > > from
> > > the field, most users will not be able to set it up regardless.
> > > 
> > > There are similar ways to print out hex, but the logic below
> > > creates
> > > specific and parseable logs in the 'dmesg' output and similar.
> > > 
> > > I have written a tool that can decode these messages into useful
> > > human-readable
> > > text so that I can debug firmware issues both locally and from
> > > field reports.
> > > 
> > > Stock firmware generates similar logs and QCA could write their
> > > own decode logic
> > > for their firmware versions.
> > 
> > Reinventing the wheel by using printk as the delivery mechanism
> > doesn't
> > sound like a good idea. IIRC Emmanuel talked about some kind of
> > firmware
> > debugging framework, he might have some ideas.
> 
> Waiting for magical frameworks to fix problems is even worse.
> 
It has been years since ath10k has been in the kernel.  There is
> basically
> still no way to debug what the firmware is doing.
> 

I know the feeling :) I was in the same situation before I added stuff
for iwlwifi.

> My patch gives you something that can work right now, with the
> standard 'dmesg'
> framework found in virtually all kernels new and old, and it has been
> proven
> to be useful in the field.  The messages are also nicely interleaved
> with the
> rest of the mac80211 stack messages and any other driver messages, so
> you have
> context.
> 
> If someone wants to add support for a framework later, then by all
> means, post
> the patches when it is ready.

From my experience, a strong and easy-to-use firmware debug
infrastructure is important because typically, the firmware is written
by other people who have different priorities (and are not always Linux
wizards) etc... Being able to give them good data is the only way to
have them fix their bugs :) For us, it was really a game changer. When
you work for a big corporate, having 2 groups work better together
always has a big impact. That's for the philosophical part :)

FWIW: what I did has nothing to do with FW 'live tracing', but with
firmware dumps. One part of our firmware dumps include tracing. We also
have "firmware prints", but we don't print them in the kernel log and
they are not part of the firmware dump thing. We rather record them in
tracepoints just like really *anything* that comes from the firmware.
Basically, we have 2 layers, the transport layer (PCIe) and the
operation_mode layer. The first just brings the data from the firmware
and in that layer we *blindly* record everything in tracepoints. In the
operation_mode layer, we look at the data itself. In case of debug
prints from the firmware, we simply discard them, because we don't
really care of the meaning. All we want is to have them go through the
PCIe layer so that they are recorded in the tracepoints.
When we finish recording the sequence we wanted with tracing (trace
-cmd), we parse the output and then, we parse the firmware prints.
IMHO, this is more reliable than kernel logs and you don't lose the
alignment with the driver traces as long as you have driver data in
tracepoints as well.

> 
> Thanks,
> Ben
> 
>
Ben Greear Sept. 15, 2016, 5:59 p.m. UTC | #6
On 09/15/2016 10:34 AM, Grumbach, Emmanuel wrote:
> On Thu, 2016-09-15 at 08:14 -0700, Ben Greear wrote:
>> On 09/15/2016 07:06 AM, Valo, Kalle wrote:
>>> Ben Greear <greearb@candelatech.com> writes:
>>>
>>>> On 09/14/2016 07:18 AM, Valo, Kalle wrote:
>>>>> greearb@candelatech.com writes:
>>>>>
>>>>>> From: Ben Greear <greearb@candelatech.com>
>>>>>>
>>>>>> This allows user-space tools to decode debug-log
>>>>>> messages by parsing dmesg or /var/log/messages.
>>>>>>
>>>>>> Signed-off-by: Ben Greear <greearb@candelatech.com>
>>>>>
>>>>> Don't tracing points already provide the same information?
>>>>
>>>> Tracing tools are difficult to set up and may not be available on
>>>> random embedded devices.  And if we are dealing with bug reports
>>>> from
>>>> the field, most users will not be able to set it up regardless.
>>>>
>>>> There are similar ways to print out hex, but the logic below
>>>> creates
>>>> specific and parseable logs in the 'dmesg' output and similar.
>>>>
>>>> I have written a tool that can decode these messages into useful
>>>> human-readable
>>>> text so that I can debug firmware issues both locally and from
>>>> field reports.
>>>>
>>>> Stock firmware generates similar logs and QCA could write their
>>>> own decode logic
>>>> for their firmware versions.
>>>
>>> Reinventing the wheel by using printk as the delivery mechanism
>>> doesn't
>>> sound like a good idea. IIRC Emmanuel talked about some kind of
>>> firmware
>>> debugging framework, he might have some ideas.
>>
>> Waiting for magical frameworks to fix problems is even worse.
>>
> It has been years since ath10k has been in the kernel.  There is
>> basically
>> still no way to debug what the firmware is doing.
>>
>
> I know the feeling :) I was in the same situation before I added stuff
> for iwlwifi.
>
>> My patch gives you something that can work right now, with the
>> standard 'dmesg'
>> framework found in virtually all kernels new and old, and it has been
>> proven
>> to be useful in the field.  The messages are also nicely interleaved
>> with the
>> rest of the mac80211 stack messages and any other driver messages, so
>> you have
>> context.
>>
>> If someone wants to add support for a framework later, then by all
>> means, post
>> the patches when it is ready.
>
> From my experience, a strong and easy-to-use firmware debug
> infrastructure is important because typically, the firmware is written
> by other people who have different priorities (and are not always Linux
> wizards) etc... Being able to give them good data is the only way to
> have them fix their bugs :) For us, it was really a game changer. When
> you work for a big corporate, having 2 groups work better together
> always has a big impact. That's for the philosophical part :)
>
> FWIW: what I did has nothing to do with FW 'live tracing', but with
> firmware dumps. One part of our firmware dumps include tracing. We also
> have "firmware prints", but we don't print them in the kernel log and
> they are not part of the firmware dump thing. We rather record them in
> tracepoints just like really *anything* that comes from the firmware.
> Basically, we have 2 layers, the transport layer (PCIe) and the
> operation_mode layer. The first just brings the data from the firmware
> and in that layer we *blindly* record everything in tracepoints. In the
> operation_mode layer, we look at the data itself. In case of debug
> prints from the firmware, we simply discard them, because we don't
> really care of the meaning. All we want is to have them go through the
> PCIe layer so that they are recorded in the tracepoints.
> When we finish recording the sequence we wanted with tracing (trace
> -cmd), we parse the output and then, we parse the firmware prints.
> IMHO, this is more reliable than kernel logs and you don't lose the
> alignment with the driver traces as long as you have driver data in
> tracepoints as well.

I have other patches that remember the last 100 or so firmware log messages from
the kernel and provide that in a binary dump image when firmware crashes.

This is indeed very useful.

But, when debugging non-crash occasions, it is still useful to see what
the firmware is doing.

For instance, maybe it is reporting lots of tx-hangs and/or low-level
resets.  This gives you a clue as to why a user might report 'my wifi sucks'.

Since I am both FW and driver team for my firmware variant,
and my approach has been working for me, then I feel it is certainly better than
the current state.  And just maybe the official upstream FW team could start
using something similar as well.  Currently, I don't see how they can ever make
much progress on firmware crashes reported in stock kernels.

Thanks,
Ben
Ben Greear Sept. 15, 2016, 6:08 p.m. UTC | #7
On 09/15/2016 10:59 AM, Ben Greear wrote:
> On 09/15/2016 10:34 AM, Grumbach, Emmanuel wrote:

> I have other patches that remember the last 100 or so firmware log messages from
> the kernel and provide that in a binary dump image when firmware crashes.
>
> This is indeed very useful.
>
> But, when debugging non-crash occasions, it is still useful to see what
> the firmware is doing.
>
> For instance, maybe it is reporting lots of tx-hangs and/or low-level
> resets.  This gives you a clue as to why a user might report 'my wifi sucks'.
>
> Since I am both FW and driver team for my firmware variant,
> and my approach has been working for me, then I feel it is certainly better than
> the current state.  And just maybe the official upstream FW team could start
> using something similar as well.  Currently, I don't see how they can ever make
> much progress on firmware crashes reported in stock kernels.

I forgot the other good reason about console logging this:

Sometimes ath10k firmware crash causes the entire kernel to crash, so there is no time to
grab the binary dump from a file system...but a serial console can help you
make progress...

Thanks,
Ben
Grumbach, Emmanuel Sept. 15, 2016, 8:22 p.m. UTC | #8
On Thu, 2016-09-15 at 10:59 -0700, Ben Greear wrote:
> On 09/15/2016 10:34 AM, Grumbach, Emmanuel wrote:
> > On Thu, 2016-09-15 at 08:14 -0700, Ben Greear wrote:
> > > On 09/15/2016 07:06 AM, Valo, Kalle wrote:
> > > > Ben Greear <greearb@candelatech.com> writes:
> > > > 
> > > > > On 09/14/2016 07:18 AM, Valo, Kalle wrote:
> > > > > > greearb@candelatech.com writes:
> > > > > > 
> > > > > > > From: Ben Greear <greearb@candelatech.com>
> > > > > > > 
> > > > > > > This allows user-space tools to decode debug-log
> > > > > > > messages by parsing dmesg or /var/log/messages.
> > > > > > > 
> > > > > > > Signed-off-by: Ben Greear <greearb@candelatech.com>
> > > > > > 
> > > > > > Don't tracing points already provide the same information?
> > > > > 
> > > > > Tracing tools are difficult to set up and may not be
> > > > > available on
> > > > > random embedded devices.  And if we are dealing with bug
> > > > > reports
> > > > > from
> > > > > the field, most users will not be able to set it up
> > > > > regardless.
> > > > > 
> > > > > There are similar ways to print out hex, but the logic below
> > > > > creates
> > > > > specific and parseable logs in the 'dmesg' output and
> > > > > similar.
> > > > > 
> > > > > I have written a tool that can decode these messages into
> > > > > useful
> > > > > human-readable
> > > > > text so that I can debug firmware issues both locally and
> > > > > from
> > > > > field reports.
> > > > > 
> > > > > Stock firmware generates similar logs and QCA could write
> > > > > their
> > > > > own decode logic
> > > > > for their firmware versions.
> > > > 
> > > > Reinventing the wheel by using printk as the delivery mechanism
> > > > doesn't
> > > > sound like a good idea. IIRC Emmanuel talked about some kind of
> > > > firmware
> > > > debugging framework, he might have some ideas.
> > > 
> > > Waiting for magical frameworks to fix problems is even worse.
> > > 
> > It has been years since ath10k has been in the kernel.  There is
> > > basically
> > > still no way to debug what the firmware is doing.
> > > 
> > 
> > I know the feeling :) I was in the same situation before I added
> > stuff
> > for iwlwifi.
> > 
> > > My patch gives you something that can work right now, with the
> > > standard 'dmesg'
> > > framework found in virtually all kernels new and old, and it has
> > > been
> > > proven
> > > to be useful in the field.  The messages are also nicely
> > > interleaved
> > > with the
> > > rest of the mac80211 stack messages and any other driver
> > > messages, so
> > > you have
> > > context.
> > > 
> > > If someone wants to add support for a framework later, then by
> > > all
> > > means, post
> > > the patches when it is ready.
> > 
> > From my experience, a strong and easy-to-use firmware debug
> > infrastructure is important because typically, the firmware is
> > written
> > by other people who have different priorities (and are not always
> > Linux
> > wizards) etc... Being able to give them good data is the only way
> > to
> > have them fix their bugs :) For us, it was really a game changer.
> > When
> > you work for a big corporate, having 2 groups work better together
> > always has a big impact. That's for the philosophical part :)
> > 
> > FWIW: what I did has nothing to do with FW 'live tracing', but with
> > firmware dumps. One part of our firmware dumps include tracing. We
> > also
> > have "firmware prints", but we don't print them in the kernel log
> > and
> > they are not part of the firmware dump thing. We rather record them
> > in
> > tracepoints just like really *anything* that comes from the
> > firmware.
> > Basically, we have 2 layers, the transport layer (PCIe) and the
> > operation_mode layer. The first just brings the data from the
> > firmware
> > and in that layer we *blindly* record everything in tracepoints. In
> > the
> > operation_mode layer, we look at the data itself. In case of debug
> > prints from the firmware, we simply discard them, because we don't
> > really care of the meaning. All we want is to have them go through
> > the
> > PCIe layer so that they are recorded in the tracepoints.
> > When we finish recording the sequence we wanted with tracing (trace
> > -cmd), we parse the output and then, we parse the firmware prints.
> > IMHO, this is more reliable than kernel logs and you don't lose the
> > alignment with the driver traces as long as you have driver data in
> > tracepoints as well.
> 
> I have other patches that remember the last 100 or so firmware log
> messages from
> the kernel and provide that in a binary dump image when firmware
> crashes.
> 
> This is indeed very useful.
> 
> But, when debugging non-crash occasions, it is still useful to see
> what
> the firmware is doing.
> 

For that, I have come up with the "triggers". Triggers are conditions
that can be detected by the driver and enabled by the user. So
basically, we can say: "Please dump the logs when you are deauth'ed by
the AP". Or when you get delBA, or when the stats that come up from the
firmware say such and such etc... There are hooks that I added in
mac80211 to let the driver know about events that are handled there
(MLME and friends). Then, even if your logs are stored in a cyclic
buffer, you don't miss them and you catch them at the right spot.
One of the most useful trigger we have is when a Tx packet is dropped.
You can take a look at struct iwl_fw_dbg_trigger_tlv in iwlwifi if you
want.

> For instance, maybe it is reporting lots of tx-hangs and/or low-level
> resets.  This gives you a clue as to why a user might report 'my wifi
> sucks'.
> > Since I am both FW and driver team for my firmware variant,
> and my approach has been working for me, then I feel it is certainly
> better than
> the current state.  And just maybe the official upstream FW team
> could start
> using something similar as well.  Currently, I don't see how they can
> ever make
> much progress on firmware crashes reported in stock kernels.
> > Thanks,
> Ben
diff mbox

Patch

diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index c7bff1e..97de9f37 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -2717,3 +2717,75 @@  void ath10k_dbg_dump(struct ath10k *ar,
 EXPORT_SYMBOL(ath10k_dbg_dump);
 
 #endif /* CONFIG_ATH10K_DEBUG */
+
+void ath10k_dbg_print_fw_dbg_buffer(struct ath10k *ar, __le32 *ibuf, int len,
+				    const char* lvl)
+{
+	/* Print out raw hex, external tools can decode if
+	 * they care.
+	 * TODO:  Add ar identifier to messages.
+	 */
+	int q = 0;
+
+	dev_printk(lvl, ar->dev, "ath10k_pci ATH10K_DBG_BUFFER:\n");
+	while (q < len) {
+		if (q + 8 <= len) {
+			printk("%sath10k: [%04d]: %08X %08X %08X %08X %08X %08X %08X %08X\n",
+			       lvl, q,
+			       ibuf[q], ibuf[q+1], ibuf[q+2], ibuf[q+3],
+			       ibuf[q+4], ibuf[q+5], ibuf[q+6], ibuf[q+7]);
+			q += 8;
+		}
+		else if (q + 7 <= len) {
+			printk("%sath10k: [%04d]: %08X %08X %08X %08X %08X %08X %08X\n",
+			       lvl, q,
+			       ibuf[q], ibuf[q+1], ibuf[q+2], ibuf[q+3],
+			       ibuf[q+4], ibuf[q+5], ibuf[q+6]);
+			q += 7;
+		}
+		else if (q + 6 <= len) {
+			printk("%sath10k: [%04d]: %08X %08X %08X %08X %08X %08X\n",
+			       lvl, q,
+			       ibuf[q], ibuf[q+1], ibuf[q+2], ibuf[q+3],
+			       ibuf[q+4], ibuf[q+5]);
+			q += 6;
+		}
+		else if (q + 5 <= len) {
+			printk("%sath10k: [%04d]: %08X %08X %08X %08X %08X\n",
+			       lvl, q,
+			       ibuf[q], ibuf[q+1], ibuf[q+2], ibuf[q+3],
+			       ibuf[q+4]);
+			q += 5;
+		}
+		else if (q + 4 <= len) {
+			printk("%sath10k: [%04d]: %08X %08X %08X %08X\n",
+			       lvl, q,
+			       ibuf[q], ibuf[q+1], ibuf[q+2], ibuf[q+3]);
+			q += 4;
+		}
+		else if (q + 3 <= len) {
+			printk("%sath10k: [%04d]: %08X %08X %08X\n",
+			       lvl, q,
+			       ibuf[q], ibuf[q+1], ibuf[q+2]);
+			q += 3;
+		}
+		else if (q + 2 <= len) {
+			printk("%sath10k: [%04d]: %08X %08X\n",
+			       lvl, q,
+			       ibuf[q], ibuf[q+1]);
+			q += 2;
+		}
+		else if (q + 1 <= len) {
+			printk("%sath10k: [%04d]: %08X\n",
+			       lvl, q,
+			       ibuf[q]);
+			q += 1;
+		}
+		else {
+			break;
+		}
+	}/* while */
+
+	dev_printk(lvl, ar->dev, "ATH10K_END\n");
+}
+EXPORT_SYMBOL(ath10k_dbg_print_fw_dbg_buffer);
diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index 3062948..641fce1 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -38,6 +38,7 @@  enum ath10k_debug_mask {
 	ATH10K_DBG_WMI_PRINT	= 0x00002000,
 	ATH10K_DBG_PCI_PS	= 0x00004000,
 	ATH10K_DBG_AHB		= 0x00008000,
+	ATH10K_DBG_FW		= 0x80000000,
 	ATH10K_DBG_ANY		= 0xffffffff,
 };
 
@@ -192,4 +193,8 @@  static inline void ath10k_dbg_dump(struct ath10k *ar,
 {
 }
 #endif /* CONFIG_ATH10K_DEBUG */
+
+void ath10k_dbg_print_fw_dbg_buffer(struct ath10k *ar, __le32 *buffer,
+				    int len, const char* lvl);
+
 #endif /* _DEBUG_H_ */
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index e6315ec..dbf0db8 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -1623,6 +1623,9 @@  static void ath10k_pci_dump_dbglog(struct ath10k *ar)
 		WARN_ON(len & 0x3);
 
 		ath10k_dbg_save_fw_dbg_buffer(ar, (__le32 *)(buffer), len >> 2);
+		ath10k_dbg_print_fw_dbg_buffer(ar, (__le32 *)(buffer),
+					       dbuf.length/sizeof(__le32),
+					       KERN_ERR);
 		kfree(buffer);
 
 next:
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index d9e4b77..6cfba41 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -2510,6 +2510,18 @@  int ath10k_wmi_event_debug_mesg(struct ath10k *ar, struct sk_buff *skb)
 				      (skb->len - 4)/sizeof(__le32));
 	spin_unlock_bh(&ar->data_lock);
 
+	if (ev->dropped_count)
+		ath10k_warn(ar, "WARNING: Dropped dbglog buffers: %d\n", __le32_to_cpu(ev->dropped_count));
+
+	if (ath10k_debug_mask & ATH10K_DBG_FW)
+		ath10k_dbg_print_fw_dbg_buffer(ar, ev->messages,
+					       (skb->len - 4)/sizeof(__le32),
+					       KERN_INFO);
+	else
+		ath10k_dbg_print_fw_dbg_buffer(ar, ev->messages,
+					       (skb->len - 4)/sizeof(__le32),
+					       KERN_DEBUG);
+
 	return 0;
 }