diff mbox series

[net-next,v3,08/12] net: ethernet: oa_tc6: implement transmit path to transfer tx ethernet frames

Message ID 20240306085017.21731-9-Parthiban.Veerasooran@microchip.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series Add support for OPEN Alliance 10BASE-T1x MACPHY Serial Interface | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 940 this patch: 940
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 1 maintainers not CCed: parthiban.veerasooran@microchip.com
netdev/build_clang success Errors and warnings before: 956 this patch: 956
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 956 this patch: 956
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-03-06--15-00 (tests: 891)

Commit Message

Parthiban Veerasooran March 6, 2024, 8:50 a.m. UTC
The transmit ethernet frame will be converted into multiple transmit data
chunks. Each transmit data chunk consists of a 4 bytes header followed by
a 64 bytes transmit data chunk payload. The 4 bytes data header occurs at
the beginning of each transmit data chunk on MOSI. The data header
contains the information needed to determine the validity and location of
the transmit frame data within the data chunk payload. The number of
transmit data chunks transmitted to mac-phy is limited to the number
transmit credits available in the mac-phy. Initially the transmit credits
will be updated from the buffer status register and then it will be
updated from the footer received on each spi data transfer. The received
footer will be examined for the transmit errors if any.

Signed-off-by: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
---
 drivers/net/ethernet/oa_tc6.c | 386 +++++++++++++++++++++++++++++++++-
 include/linux/oa_tc6.h        |   1 +
 2 files changed, 385 insertions(+), 2 deletions(-)

Comments

Andrew Lunn March 7, 2024, 5:08 p.m. UTC | #1
> @@ -55,6 +77,14 @@
>  						(OA_TC6_CTRL_MAX_REGISTERS *\
>  						OA_TC6_CTRL_REG_VALUE_SIZE) +\
>  						OA_TC6_CTRL_IGNORED_SIZE)
> +#define OA_TC6_CHUNK_PAYLOAD_SIZE		64
> +#define OA_TC6_DATA_HEADER_SIZE			4
> +#define OA_TC6_CHUNK_SIZE			(OA_TC6_DATA_HEADER_SIZE +\
> +						OA_TC6_CHUNK_PAYLOAD_SIZE)
> +#define OA_TC6_TX_SKB_QUEUE_SIZE		100

So you keep up to 100 packets in a queue. If use assume typical MTU
size packets, that is 1,238,400 bits. At 10Mbps, that is 120ms of
traffic. That is quite a lot of latency when a high priority packet is
added to the tail of the queue and needs to wait for all the other
packets to be sent first.

Chunks are 64 bytes. So in practice, you only ever need two
packets. You need to be able to fill a chunk with the final part of
one packet, and the beginning of the next. So i would try using a much
smaller queue size. That will allow Linux queue disciplines to give
you the high priority packets first which you send with low latency.

> +static void oa_tc6_add_tx_skb_to_spi_buf(struct oa_tc6 *tc6)
> +{
> +	enum oa_tc6_data_start_valid_info start_valid = OA_TC6_DATA_START_INVALID;
> +	enum oa_tc6_data_end_valid_info end_valid = OA_TC6_DATA_END_INVALID;
> +	__be32 *tx_buf = tc6->spi_data_tx_buf + tc6->spi_data_tx_buf_offset;
> +	u16 remaining_length = tc6->tx_skb->len - tc6->tx_skb_offset;
> +	u8 *tx_skb_data = tc6->tx_skb->data + tc6->tx_skb_offset;
> +	u8 end_byte_offset = 0;
> +	u16 length_to_copy;
> +
> +	/* Set start valid if the current tx chunk contains the start of the tx
> +	 * ethernet frame.
> +	 */
> +	if (!tc6->tx_skb_offset)
> +		start_valid = OA_TC6_DATA_START_VALID;
> +
> +	/* If the remaining tx skb length is more than the chunk payload size of
> +	 * 64 bytes then copy only 64 bytes and leave the ongoing tx skb for
> +	 * next tx chunk.
> +	 */
> +	length_to_copy = min_t(u16, remaining_length, OA_TC6_CHUNK_PAYLOAD_SIZE);
> +
> +	/* Copy the tx skb data to the tx chunk payload buffer */
> +	memcpy(tx_buf + 1, tx_skb_data, length_to_copy);
> +	tc6->tx_skb_offset += length_to_copy;

You probably need a call to skb_linearize() somewhere. You assume the
packet data is contiguous. It can in fact be split into multiple
segments. skb_linearize() will convert it to a single buffer.

> +static int oa_tc6_try_spi_transfer(struct oa_tc6 *tc6)
> +{
> +	int ret;
> +
> +	while (true) {
> +		u16 spi_length = 0;
> +
> +		tc6->spi_data_tx_buf_offset = 0;
> +
> +		if (tc6->tx_skb || !skb_queue_empty(&tc6->tx_skb_q))
> +			spi_length = oa_tc6_prepare_spi_tx_buf_for_tx_skbs(tc6);
> +
> +		if (spi_length == 0)
> +			break;
> +
> +		ret = oa_tc6_spi_transfer(tc6, OA_TC6_DATA_HEADER, spi_length);
> +		if (ret) {
> +			netdev_err(tc6->netdev,
> +				   "SPI data transfer failed. Restart the system: %d\n",
> +				   ret);

What does Restart the system mean?

> +static int oa_tc6_spi_thread_handler(void *data)
> +{
> +	struct oa_tc6 *tc6 = data;
> +	int ret;
> +
> +	while (likely(!kthread_should_stop())) {
> +		/* This kthread will be waken up if there is a tx skb */
> +		wait_event_interruptible(tc6->spi_wq,
> +					 !skb_queue_empty(&tc6->tx_skb_q) ||
> +					 kthread_should_stop());
> +		ret = oa_tc6_try_spi_transfer(tc6);

Shouldn't you check why you have been woken up? It seems more logical
to test here for kthread_should_stop() rather than have
oa_tc6_try_spi_transfer() handle there is not actually a packet to be
sent.

	Andrew
Parthiban Veerasooran March 19, 2024, 12:54 p.m. UTC | #2
Hi Andrew,

On 07/03/24 10:38 pm, Andrew Lunn wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
>> @@ -55,6 +77,14 @@
>>                                                (OA_TC6_CTRL_MAX_REGISTERS *\
>>                                                OA_TC6_CTRL_REG_VALUE_SIZE) +\
>>                                                OA_TC6_CTRL_IGNORED_SIZE)
>> +#define OA_TC6_CHUNK_PAYLOAD_SIZE            64
>> +#define OA_TC6_DATA_HEADER_SIZE                      4
>> +#define OA_TC6_CHUNK_SIZE                    (OA_TC6_DATA_HEADER_SIZE +\
>> +                                             OA_TC6_CHUNK_PAYLOAD_SIZE)
>> +#define OA_TC6_TX_SKB_QUEUE_SIZE             100
> 
> So you keep up to 100 packets in a queue. If use assume typical MTU
> size packets, that is 1,238,400 bits. At 10Mbps, that is 120ms of
> traffic. That is quite a lot of latency when a high priority packet is
> added to the tail of the queue and needs to wait for all the other
> packets to be sent first.
> 
> Chunks are 64 bytes. So in practice, you only ever need two
> packets. You need to be able to fill a chunk with the final part of
> one packet, and the beginning of the next. So i would try using a much
> smaller queue size. That will allow Linux queue disciplines to give
> you the high priority packets first which you send with low latency.
Thanks for the detailed explanation. If I understand you correctly,

1. The tx skb queue size (OA_TC6_TX_SKB_QUEUE_SIZE) should be 2 to avoid 
the latency when a high priority packet added.

2. Need to implement the handling part of the below case,
In case if one packet ends in a chunk and that chunk still having some 
space left to accommodate some bytes from the next packet if available 
from network layer.

Will implement in the next version.
> 
>> +static void oa_tc6_add_tx_skb_to_spi_buf(struct oa_tc6 *tc6)
>> +{
>> +     enum oa_tc6_data_start_valid_info start_valid = OA_TC6_DATA_START_INVALID;
>> +     enum oa_tc6_data_end_valid_info end_valid = OA_TC6_DATA_END_INVALID;
>> +     __be32 *tx_buf = tc6->spi_data_tx_buf + tc6->spi_data_tx_buf_offset;
>> +     u16 remaining_length = tc6->tx_skb->len - tc6->tx_skb_offset;
>> +     u8 *tx_skb_data = tc6->tx_skb->data + tc6->tx_skb_offset;
>> +     u8 end_byte_offset = 0;
>> +     u16 length_to_copy;
>> +
>> +     /* Set start valid if the current tx chunk contains the start of the tx
>> +      * ethernet frame.
>> +      */
>> +     if (!tc6->tx_skb_offset)
>> +             start_valid = OA_TC6_DATA_START_VALID;
>> +
>> +     /* If the remaining tx skb length is more than the chunk payload size of
>> +      * 64 bytes then copy only 64 bytes and leave the ongoing tx skb for
>> +      * next tx chunk.
>> +      */
>> +     length_to_copy = min_t(u16, remaining_length, OA_TC6_CHUNK_PAYLOAD_SIZE);
>> +
>> +     /* Copy the tx skb data to the tx chunk payload buffer */
>> +     memcpy(tx_buf + 1, tx_skb_data, length_to_copy);
>> +     tc6->tx_skb_offset += length_to_copy;
> 
> You probably need a call to skb_linearize() somewhere. You assume the
> packet data is contiguous. It can in fact be split into multiple
> segments. skb_linearize() will convert it to a single buffer.
Ah ok. Then probably I have to add the below code in the 
oa_tc6_start_xmit() function before adding the skb into the transmit queue.

if (skb_linearize(skb)) {
	dev_kfree_skb_any(skb);
	tc6->netdev->stats.tx_dropped++;
	return NETDEV_TX_OK;
}

> 
>> +static int oa_tc6_try_spi_transfer(struct oa_tc6 *tc6)
>> +{
>> +     int ret;
>> +
>> +     while (true) {
>> +             u16 spi_length = 0;
>> +
>> +             tc6->spi_data_tx_buf_offset = 0;
>> +
>> +             if (tc6->tx_skb || !skb_queue_empty(&tc6->tx_skb_q))
>> +                     spi_length = oa_tc6_prepare_spi_tx_buf_for_tx_skbs(tc6);
>> +
>> +             if (spi_length == 0)
>> +                     break;
>> +
>> +             ret = oa_tc6_spi_transfer(tc6, OA_TC6_DATA_HEADER, spi_length);
>> +             if (ret) {
>> +                     netdev_err(tc6->netdev,
>> +                                "SPI data transfer failed. Restart the system: %d\n",
>> +                                ret);
> 
> What does Restart the system mean?
Hmm, actually if SPI transfer failed then it can be hardware failure or 
poor SPI connection. Now I realize that just restarting the system will 
not help. I will remove "Restart the system:" as it is not the correct info.
> 
>> +static int oa_tc6_spi_thread_handler(void *data)
>> +{
>> +     struct oa_tc6 *tc6 = data;
>> +     int ret;
>> +
>> +     while (likely(!kthread_should_stop())) {
>> +             /* This kthread will be waken up if there is a tx skb */
>> +             wait_event_interruptible(tc6->spi_wq,
>> +                                      !skb_queue_empty(&tc6->tx_skb_q) ||
>> +                                      kthread_should_stop());
>> +             ret = oa_tc6_try_spi_transfer(tc6);
> 
> Shouldn't you check why you have been woken up? It seems more logical
> to test here for kthread_should_stop() rather than have
> oa_tc6_try_spi_transfer() handle there is not actually a packet to be
> sent.
Ok, then I will add the below code before the oa_tc6_try_spi_transfer().

if (kthread_should_stop())
	break;

Best regards,
Parthiban V
> 
>          Andrew
>
Andrew Lunn March 19, 2024, 1:19 p.m. UTC | #3
On Tue, Mar 19, 2024 at 12:54:30PM +0000, Parthiban.Veerasooran@microchip.com wrote:
> Hi Andrew,
> 
> On 07/03/24 10:38 pm, Andrew Lunn wrote:
> > EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> > 
> >> @@ -55,6 +77,14 @@
> >>                                                (OA_TC6_CTRL_MAX_REGISTERS *\
> >>                                                OA_TC6_CTRL_REG_VALUE_SIZE) +\
> >>                                                OA_TC6_CTRL_IGNORED_SIZE)
> >> +#define OA_TC6_CHUNK_PAYLOAD_SIZE            64
> >> +#define OA_TC6_DATA_HEADER_SIZE                      4
> >> +#define OA_TC6_CHUNK_SIZE                    (OA_TC6_DATA_HEADER_SIZE +\
> >> +                                             OA_TC6_CHUNK_PAYLOAD_SIZE)
> >> +#define OA_TC6_TX_SKB_QUEUE_SIZE             100
> > 
> > So you keep up to 100 packets in a queue. If use assume typical MTU
> > size packets, that is 1,238,400 bits. At 10Mbps, that is 120ms of
> > traffic. That is quite a lot of latency when a high priority packet is
> > added to the tail of the queue and needs to wait for all the other
> > packets to be sent first.
> > 
> > Chunks are 64 bytes. So in practice, you only ever need two
> > packets. You need to be able to fill a chunk with the final part of
> > one packet, and the beginning of the next. So i would try using a much
> > smaller queue size. That will allow Linux queue disciplines to give
> > you the high priority packets first which you send with low latency.
> Thanks for the detailed explanation. If I understand you correctly,
> 
> 1. The tx skb queue size (OA_TC6_TX_SKB_QUEUE_SIZE) should be 2 to avoid 
> the latency when a high priority packet added.
> 
> 2. Need to implement the handling part of the below case,
> In case if one packet ends in a chunk and that chunk still having some 
> space left to accommodate some bytes from the next packet if available 
> from network layer.

This second part is clearly an optimisation. If you have lots of full
MTU packets, 1514 bytes, they take around 24 chunks. Having the last
chunk only 1/2 full does not waste too much bandwidth. But if you are
carrying lots of small packets, say voice, 130 bytes, the wasted
bandwidth starts to add up. But is there a use case for 10Mbps of
small packets? I doubt it.

So if you don't have the ability to combine two packets into one
chunk, i would do that later. Lets get the basics merged first, it can
be optimised later.

	Andrew
Parthiban Veerasooran March 20, 2024, 10:43 a.m. UTC | #4
Hi Andrew,

On 19/03/24 6:49 pm, Andrew Lunn wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> On Tue, Mar 19, 2024 at 12:54:30PM +0000, Parthiban.Veerasooran@microchip.com wrote:
>> Hi Andrew,
>>
>> On 07/03/24 10:38 pm, Andrew Lunn wrote:
>>> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>>>
>>>> @@ -55,6 +77,14 @@
>>>>                                                 (OA_TC6_CTRL_MAX_REGISTERS *\
>>>>                                                 OA_TC6_CTRL_REG_VALUE_SIZE) +\
>>>>                                                 OA_TC6_CTRL_IGNORED_SIZE)
>>>> +#define OA_TC6_CHUNK_PAYLOAD_SIZE            64
>>>> +#define OA_TC6_DATA_HEADER_SIZE                      4
>>>> +#define OA_TC6_CHUNK_SIZE                    (OA_TC6_DATA_HEADER_SIZE +\
>>>> +                                             OA_TC6_CHUNK_PAYLOAD_SIZE)
>>>> +#define OA_TC6_TX_SKB_QUEUE_SIZE             100
>>>
>>> So you keep up to 100 packets in a queue. If use assume typical MTU
>>> size packets, that is 1,238,400 bits. At 10Mbps, that is 120ms of
>>> traffic. That is quite a lot of latency when a high priority packet is
>>> added to the tail of the queue and needs to wait for all the other
>>> packets to be sent first.
>>>
>>> Chunks are 64 bytes. So in practice, you only ever need two
>>> packets. You need to be able to fill a chunk with the final part of
>>> one packet, and the beginning of the next. So i would try using a much
>>> smaller queue size. That will allow Linux queue disciplines to give
>>> you the high priority packets first which you send with low latency.
>> Thanks for the detailed explanation. If I understand you correctly,
>>
>> 1. The tx skb queue size (OA_TC6_TX_SKB_QUEUE_SIZE) should be 2 to avoid
>> the latency when a high priority packet added.
>>
>> 2. Need to implement the handling part of the below case,
>> In case if one packet ends in a chunk and that chunk still having some
>> space left to accommodate some bytes from the next packet if available
>> from network layer.
> 
> This second part is clearly an optimisation. If you have lots of full
> MTU packets, 1514 bytes, they take around 24 chunks. Having the last
> chunk only 1/2 full does not waste too much bandwidth. But if you are
> carrying lots of small packets, say voice, 130 bytes, the wasted
> bandwidth starts to add up. But is there a use case for 10Mbps of
> small packets? I doubt it.
Yes, for sure there is a possibility to get into this scenario and the 
protocol also supports that. But as proposed by you below, let's 
implement it as part of optimization later.
> 
> So if you don't have the ability to combine two packets into one
> chunk, i would do that later. Lets get the basics merged first, it can
> be optimised later.
Yes, I agree with this proposal to get the basic version merged first.

Best regards,
Parthiban V
> 
>          Andrew
Selvamani Rajagopal March 21, 2024, 7:04 p.m. UTC | #5
> -----Original Message-----
> From: Parthiban.Veerasooran@microchip.com
> <Parthiban.Veerasooran@microchip.com>
> Sent: Wednesday, March 20, 2024 3:43 AM
> To: andrew@lunn.ch
> Cc: davem@davemloft.net; edumazet@google.com; kuba@kernel.org;
> pabeni@redhat.com; horms@kernel.org; saeedm@nvidia.com;
> anthony.l.nguyen@intel.com; netdev@vger.kernel.org; linux-
> kernel@vger.kernel.org; corbet@lwn.net; linux-doc@vger.kernel.org;
> robh+dt@kernel.org; krzysztof.kozlowski+dt@linaro.org;
> conor+dt@kernel.org; devicetree@vger.kernel.org;
> Horatiu.Vultur@microchip.com; ruanjinjie@huawei.com;
> Steen.Hegelund@microchip.com; vladimir.oltean@nxp.com;
> UNGLinuxDriver@microchip.com; Thorsten.Kummermehr@microchip.com;
> Piergiorgio Beruto <Pier.Beruto@onsemi.com>; Selvamani Rajagopal
> <Selvamani.Rajagopal@onsemi.com>; Nicolas.Ferre@microchip.com;
> benjamin.bigler@bernformulastudent.ch
> Subject: Re: [PATCH net-next v3 08/12] net: ethernet: oa_tc6: implement
> transmit path to transfer tx ethernet frames
> 
> [External Email]: This email arrived from an external source - Please exercise
> caution when opening any attachments or clicking on links.
> 
> Hi Andrew,
> 
> On 19/03/24 6:49 pm, Andrew Lunn wrote:
> > EXTERNAL EMAIL: Do not click links or open attachments unless you know
> > the content is safe
> >
> > On Tue, Mar 19, 2024 at 12:54:30PM +0000,
> Parthiban.Veerasooran@microchip.com wrote:
> >> Hi Andrew,
> >>
> >> On 07/03/24 10:38 pm, Andrew Lunn wrote:
> >>> EXTERNAL EMAIL: Do not click links or open attachments unless you
> >>> know the content is safe
> >>>
> >>>> @@ -55,6 +77,14 @@
> >>>>                                                 (OA_TC6_CTRL_MAX_REGISTERS *\
> >>>>                                                 OA_TC6_CTRL_REG_VALUE_SIZE) +\
> >>>>
> >>>> OA_TC6_CTRL_IGNORED_SIZE)
> >>>> +#define OA_TC6_CHUNK_PAYLOAD_SIZE            64
> >>>> +#define OA_TC6_DATA_HEADER_SIZE                      4
> >>>> +#define OA_TC6_CHUNK_SIZE                    (OA_TC6_DATA_HEADER_SIZE
> +\
> >>>> +                                             OA_TC6_CHUNK_PAYLOAD_SIZE)
> >>>> +#define OA_TC6_TX_SKB_QUEUE_SIZE             100
> >>>
> >>> So you keep up to 100 packets in a queue. If use assume typical MTU
> >>> size packets, that is 1,238,400 bits. At 10Mbps, that is 120ms of
> >>> traffic. That is quite a lot of latency when a high priority packet
> >>> is added to the tail of the queue and needs to wait for all the
> >>> other packets to be sent first.
> >>>
> >>> Chunks are 64 bytes. So in practice, you only ever need two packets.
> >>> You need to be able to fill a chunk with the final part of one
> >>> packet, and the beginning of the next. So i would try using a much
> >>> smaller queue size. That will allow Linux queue disciplines to give
> >>> you the high priority packets first which you send with low latency.
> >> Thanks for the detailed explanation. If I understand you correctly,
> >>
> >> 1. The tx skb queue size (OA_TC6_TX_SKB_QUEUE_SIZE) should be 2 to
> >> avoid the latency when a high priority packet added.
> >>
> >> 2. Need to implement the handling part of the below case, In case if
> >> one packet ends in a chunk and that chunk still having some space
> >> left to accommodate some bytes from the next packet if available from
> >> network layer.
> >
> > This second part is clearly an optimisation. If you have lots of full
> > MTU packets, 1514 bytes, they take around 24 chunks. Having the last
> > chunk only 1/2 full does not waste too much bandwidth. But if you are
> > carrying lots of small packets, say voice, 130 bytes, the wasted
> > bandwidth starts to add up. But is there a use case for 10Mbps of
> > small packets? I doubt it.
> Yes, for sure there is a possibility to get into this scenario and the protocol also
> supports that. But as proposed by you below, let's implement it as part of
> optimization later.
> >
> > So if you don't have the ability to combine two packets into one
> > chunk, i would do that later. Lets get the basics merged first, it can
> > be optimised later.
> Yes, I agree with this proposal to get the basic version merged first.

While latency is important, so is using the available bandwidth efficiently. Here is a suggestion.  We know that the tx credit available basically tells us,
how many chunks could be transmitted without overflow. Instead of stopping the netif queue based on number of skbs queued, why not stop the queue based on
number of bytes accumulated? Basically, at any given point of time, we enqueue the tx_skb_q until we are have enough bytes to cross the threshold of (tc6->tc_credit * OA_TC6_CHUNK_PAYLOAD_SIZE).
This way, during the next transmit, we could utilize the whole available credits. Bandwidth utilization between bigger frames and smaller frames would be not be vastly different.

> 
> Best regards,
> Parthiban V
> >
> >          Andrew
Andrew Lunn March 21, 2024, 7:42 p.m. UTC | #6
> > > This second part is clearly an optimisation. If you have lots of full
> > > MTU packets, 1514 bytes, they take around 24 chunks. Having the last
> > > chunk only 1/2 full does not waste too much bandwidth. But if you are
> > > carrying lots of small packets, say voice, 130 bytes, the wasted
> > > bandwidth starts to add up. But is there a use case for 10Mbps of
> > > small packets? I doubt it.
> > Yes, for sure there is a possibility to get into this scenario and the protocol also
> > supports that. But as proposed by you below, let's implement it as part of
> > optimization later.
> > >
> > > So if you don't have the ability to combine two packets into one
> > > chunk, i would do that later. Lets get the basics merged first, it can
> > > be optimised later.
> > Yes, I agree with this proposal to get the basic version merged first.
> 
> While latency is important, so is using the available bandwidth efficiently. Here is a suggestion.  We know that the tx credit available basically tells us,
> how many chunks could be transmitted without overflow. Instead of stopping the netif queue based on number of skbs queued, why not stop the queue based on
> number of bytes accumulated? Basically, at any given point of time, we enqueue the tx_skb_q until we are have enough bytes to cross the threshold of (tc6->tc_credit * OA_TC6_CHUNK_PAYLOAD_SIZE).
> This way, during the next transmit, we could utilize the whole available credits. Bandwidth utilization between bigger frames and smaller frames would be not be vastly different.

Please configure your email client to wrap emails at around 70
characters.

tc_credit is 5 bits. So it is a maximum of 32.

A 1514 frame takes around 24 chunks. So you only need two full size
frames to consume all your possible credit.

If you happen to have smaller voice packets, say 130 bytes, you need
three chunks to send it. So you might want to have 10 such packets on
hand in order to make use of all your credit. But if you have 10 voice
packets to send in a burst, your voice quality is going to be bad,
they should be 10ms to 20ms apart, not in a burst...

I don't like the original idea of having lots of packets in a transmit
queue. But having 1/2 dozen should not be an issue.

In general, we prefer things to be simple. We can then optimise later,
and use benchmarks to show the optimisations really do bring a benefit
to justify the added complexity.

   Andrew
Selvamani Rajagopal March 22, 2024, 6:31 p.m. UTC | #7
> -----Original Message-----
> From: Andrew Lunn <andrew@lunn.ch>
> Sent: Thursday, March 21, 2024 12:42 PM
> To: Selvamani Rajagopal <Selvamani.Rajagopal@onsemi.com>
> Cc: Parthiban.Veerasooran@microchip.com; davem@davemloft.net;
> edumazet@google.com; kuba@kernel.org; pabeni@redhat.com;
> horms@kernel.org; saeedm@nvidia.com; anthony.l.nguyen@intel.com;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org; corbet@lwn.net;
> linux-doc@vger.kernel.org; robh+dt@kernel.org;
> krzysztof.kozlowski+dt@linaro.org; conor+dt@kernel.org;
> devicetree@vger.kernel.org; Horatiu.Vultur@microchip.com;
> ruanjinjie@huawei.com; Steen.Hegelund@microchip.com;
> vladimir.oltean@nxp.com; UNGLinuxDriver@microchip.com;
> Thorsten.Kummermehr@microchip.com; Piergiorgio Beruto
> <Pier.Beruto@onsemi.com>; Nicolas.Ferre@microchip.com;
> benjamin.bigler@bernformulastudent.ch
> Subject: Re: [PATCH net-next v3 08/12] net: ethernet: oa_tc6:
> implement transmit path to transfer tx ethernet frames
> 
> [External Email]: This email arrived from an external source - Please
> exercise caution when opening any attachments or clicking on links.
> 
> > > > This second part is clearly an optimisation. If you have lots of
> > > > full MTU packets, 1514 bytes, they take around 24 chunks. Having
> > > > the last chunk only 1/2 full does not waste too much bandwidth.
> > > > But if you are carrying lots of small packets, say voice, 130
> > > > bytes, the wasted bandwidth starts to add up. But is there a use
> > > > case for 10Mbps of small packets? I doubt it.
> > > Yes, for sure there is a possibility to get into this scenario and
> > > the protocol also supports that. But as proposed by you below, let's
> > > implement it as part of optimization later.
> > > >
> > > > So if you don't have the ability to combine two packets into one
> > > > chunk, i would do that later. Lets get the basics merged first, it
> > > > can be optimised later.
> > > Yes, I agree with this proposal to get the basic version merged first.
> >
> > While latency is important, so is using the available bandwidth
> > efficiently. Here is a suggestion.  We know that the tx credit
> > available basically tells us, how many chunks could be transmitted
> without overflow. Instead of stopping the netif queue based on number
> of skbs queued, why not stop the queue based on number of bytes
> accumulated? Basically, at any given point of time, we enqueue the
> tx_skb_q until we are have enough bytes to cross the threshold of (tc6-
> >tc_credit * OA_TC6_CHUNK_PAYLOAD_SIZE).
> > This way, during the next transmit, we could utilize the whole available
> credits. Bandwidth utilization between bigger frames and smaller
> frames would be not be vastly different.
> 
> Please configure your email client to wrap emails at around 70
> characters.
> 
> tc_credit is 5 bits. So it is a maximum of 32.
> 
> A 1514 frame takes around 24 chunks. So you only need two full size
> frames to consume all your possible credit.
> 
> If you happen to have smaller voice packets, say 130 bytes, you need
> three chunks to send it. So you might want to have 10 such packets on
> hand in order to make use of all your credit. But if you have 10 voice
> packets to send in a burst, your voice quality is going to be bad, they
> should be 10ms to 20ms apart, not in a burst...
> 
> I don't like the original idea of having lots of packets in a transmit queue.
> But having 1/2 dozen should not be an issue.
> 
> In general, we prefer things to be simple. We can then optimise later,
> and use benchmarks to show the optimisations really do bring a benefit
> to justify the added complexity.

True. I should get some performance numbers to see where we are
with the current code. That would be time to look at the improvement. 

> 
>    Andrew
diff mbox series

Patch

diff --git a/drivers/net/ethernet/oa_tc6.c b/drivers/net/ethernet/oa_tc6.c
index 371687670409..44e8c86a5305 100644
--- a/drivers/net/ethernet/oa_tc6.c
+++ b/drivers/net/ethernet/oa_tc6.c
@@ -27,6 +27,13 @@ 
 /* Status Register #0 */
 #define OA_TC6_REG_STATUS0			0x0008
 #define STATUS0_RESETC				BIT(6)	/* Reset Complete */
+#define STATUS0_HEADER_ERROR			BIT(5)
+#define STATUS0_LOSS_OF_FRAME_ERROR		BIT(4)
+#define STATUS0_TX_PROTOCOL_ERROR		BIT(0)
+
+/* Buffer Status Register */
+#define OA_TC6_REG_BUFFER_STATUS		0x000B
+#define BUFFER_STATUS_TX_CREDITS_AVAILABLE	GENMASK(15, 8)
 
 /* Interrupt Mask Register #0 */
 #define OA_TC6_REG_INT_MASK0			0x000C
@@ -47,6 +54,21 @@ 
 #define OA_TC6_CTRL_HEADER_LENGTH		GENMASK(7, 1)
 #define OA_TC6_CTRL_HEADER_PARITY		BIT(0)
 
+/* Data header */
+#define OA_TC6_DATA_HEADER_DATA_NOT_CTRL	BIT(31)
+#define OA_TC6_DATA_HEADER_DATA_VALID		BIT(21)
+#define OA_TC6_DATA_HEADER_START_VALID		BIT(20)
+#define OA_TC6_DATA_HEADER_START_WORD_OFFSET	GENMASK(19, 16)
+#define OA_TC6_DATA_HEADER_END_VALID		BIT(14)
+#define OA_TC6_DATA_HEADER_END_BYTE_OFFSET	GENMASK(13, 8)
+#define OA_TC6_DATA_HEADER_PARITY		BIT(0)
+
+/* Data footer */
+#define OA_TC6_DATA_FOOTER_EXTENDED_STS		BIT(31)
+#define OA_TC6_DATA_FOOTER_RXD_HEADER_BAD	BIT(30)
+#define OA_TC6_DATA_FOOTER_CONFIG_SYNC		BIT(29)
+#define OA_TC6_DATA_FOOTER_TX_CREDITS		GENMASK(5, 1)
+
 #define OA_TC6_CTRL_HEADER_SIZE			4
 #define OA_TC6_CTRL_REG_VALUE_SIZE		4
 #define OA_TC6_CTRL_IGNORED_SIZE		4
@@ -55,6 +77,14 @@ 
 						(OA_TC6_CTRL_MAX_REGISTERS *\
 						OA_TC6_CTRL_REG_VALUE_SIZE) +\
 						OA_TC6_CTRL_IGNORED_SIZE)
+#define OA_TC6_CHUNK_PAYLOAD_SIZE		64
+#define OA_TC6_DATA_HEADER_SIZE			4
+#define OA_TC6_CHUNK_SIZE			(OA_TC6_DATA_HEADER_SIZE +\
+						OA_TC6_CHUNK_PAYLOAD_SIZE)
+#define OA_TC6_TX_SKB_QUEUE_SIZE		100
+#define OA_TC6_MAX_TX_CHUNKS			48
+#define OA_TC6_SPI_DATA_BUF_SIZE		(OA_TC6_MAX_TX_CHUNKS *\
+						OA_TC6_CHUNK_SIZE)
 #define STATUS0_RESETC_POLL_DELAY		5
 #define STATUS0_RESETC_POLL_TIMEOUT		100
 
@@ -68,10 +98,20 @@  struct oa_tc6 {
 	struct mutex spi_ctrl_lock; /* Protects spi control transfer */
 	void *spi_ctrl_tx_buf;
 	void *spi_ctrl_rx_buf;
+	void *spi_data_tx_buf;
+	void *spi_data_rx_buf;
+	struct sk_buff_head tx_skb_q;
+	struct sk_buff *tx_skb;
+	struct task_struct *spi_thread;
+	wait_queue_head_t spi_wq;
+	u16 tx_skb_offset;
+	u16 spi_data_tx_buf_offset;
+	u16 tx_credits;
 };
 
 enum oa_tc6_header_type {
 	OA_TC6_CTRL_HEADER,
+	OA_TC6_DATA_HEADER,
 };
 
 enum oa_tc6_register_op {
@@ -79,14 +119,34 @@  enum oa_tc6_register_op {
 	OA_TC6_CTRL_REG_WRITE = 1,
 };
 
+enum oa_tc6_data_valid_info {
+	OA_TC6_DATA_INVALID,
+	OA_TC6_DATA_VALID,
+};
+
+enum oa_tc6_data_start_valid_info {
+	OA_TC6_DATA_START_INVALID,
+	OA_TC6_DATA_START_VALID,
+};
+
+enum oa_tc6_data_end_valid_info {
+	OA_TC6_DATA_END_INVALID,
+	OA_TC6_DATA_END_VALID,
+};
+
 static int oa_tc6_spi_transfer(struct oa_tc6 *tc6,
 			       enum oa_tc6_header_type header_type, u16 length)
 {
 	struct spi_transfer xfer = { 0 };
 	struct spi_message msg;
 
-	xfer.tx_buf = tc6->spi_ctrl_tx_buf;
-	xfer.rx_buf = tc6->spi_ctrl_rx_buf;
+	if (header_type == OA_TC6_DATA_HEADER) {
+		xfer.tx_buf = tc6->spi_data_tx_buf;
+		xfer.rx_buf = tc6->spi_data_rx_buf;
+	} else {
+		xfer.tx_buf = tc6->spi_ctrl_tx_buf;
+		xfer.rx_buf = tc6->spi_ctrl_rx_buf;
+	}
 	xfer.len = length;
 
 	spi_message_init(&msg);
@@ -505,6 +565,296 @@  static int oa_tc6_enable_data_transfer(struct oa_tc6 *tc6)
 	return oa_tc6_write_register(tc6, OA_TC6_REG_CONFIG0, value);
 }
 
+static void oa_tc6_cleanup_ongoing_tx_skb(struct oa_tc6 *tc6)
+{
+	if (tc6->tx_skb) {
+		tc6->netdev->stats.tx_dropped++;
+		kfree_skb(tc6->tx_skb);
+		tc6->tx_skb = NULL;
+	}
+}
+
+static int oa_tc6_process_extended_status(struct oa_tc6 *tc6)
+{
+	u32 value;
+	int ret;
+
+	ret = oa_tc6_read_register(tc6, OA_TC6_REG_STATUS0, &value);
+	if (ret) {
+		netdev_err(tc6->netdev, "STATUS0 register read failed: %d\n",
+			   ret);
+		return -ENODEV;
+	}
+
+	/* Clear the error interrupts status */
+	ret = oa_tc6_write_register(tc6, OA_TC6_REG_STATUS0, value);
+	if (ret) {
+		netdev_err(tc6->netdev, "STATUS0 register write failed: %d\n",
+			   ret);
+		return -ENODEV;
+	}
+
+	if (FIELD_GET(STATUS0_TX_PROTOCOL_ERROR, value)) {
+		netdev_err(tc6->netdev, "Transmit protocol error\n");
+		return -ENODEV;
+	}
+	/* TODO: Currently loss of frame and header errors are treated as
+	 * non-recoverable errors. They will be handled in the next version.
+	 */
+	if (FIELD_GET(STATUS0_LOSS_OF_FRAME_ERROR, value)) {
+		netdev_err(tc6->netdev, "Loss of frame error\n");
+		return -ENODEV;
+	}
+	if (FIELD_GET(STATUS0_HEADER_ERROR, value)) {
+		netdev_err(tc6->netdev, "Header error\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int oa_tc6_process_rx_chunk_footer(struct oa_tc6 *tc6, u32 footer)
+{
+	/* Process rx chunk footer for the following,
+	 * 1. tx credits
+	 * 2. errors if any from MAC-PHY
+	 */
+	tc6->tx_credits = FIELD_GET(OA_TC6_DATA_FOOTER_TX_CREDITS, footer);
+
+	if (FIELD_GET(OA_TC6_DATA_FOOTER_EXTENDED_STS, footer)) {
+		int ret = oa_tc6_process_extended_status(tc6);
+
+		if (ret)
+			return ret;
+	}
+
+	/* TODO: Currently received header bad and configuration unsync errors
+	 * are treated as non-recoverable errors. They will be handled in the
+	 * next version.
+	 */
+	if (FIELD_GET(OA_TC6_DATA_FOOTER_RXD_HEADER_BAD, footer)) {
+		netdev_err(tc6->netdev, "Rxd header bad error\n");
+		return -ENODEV;
+	}
+
+	if (!FIELD_GET(OA_TC6_DATA_FOOTER_CONFIG_SYNC, footer)) {
+		netdev_err(tc6->netdev, "Config unsync error\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static u32 oa_tc6_get_rx_chunk_footer(struct oa_tc6 *tc6, u16 footer_offset)
+{
+	u8 *rx_buf = tc6->spi_data_rx_buf;
+	__be32 footer;
+
+	footer = *((__be32 *)&rx_buf[footer_offset]);
+
+	return be32_to_cpu(footer);
+}
+
+static int oa_tc6_process_spi_data_rx_buf(struct oa_tc6 *tc6, u16 length)
+{
+	u16 no_of_rx_chunks = length / OA_TC6_CHUNK_SIZE;
+	u32 footer;
+	int ret;
+
+	/* All the rx chunks in the receive SPI data buffer are examined here */
+	for (int i = 0; i < no_of_rx_chunks; i++) {
+		/* Last 4 bytes in each received chunk consist footer info */
+		footer = oa_tc6_get_rx_chunk_footer(tc6, i * OA_TC6_CHUNK_SIZE +
+						    OA_TC6_CHUNK_PAYLOAD_SIZE);
+
+		ret = oa_tc6_process_rx_chunk_footer(tc6, footer);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static __be32 oa_tc6_prepare_data_header(bool data_valid, bool start_valid,
+					 bool end_valid, u8 end_byte_offset)
+{
+	u32 header = FIELD_PREP(OA_TC6_DATA_HEADER_DATA_NOT_CTRL,
+				OA_TC6_DATA_HEADER) |
+		     FIELD_PREP(OA_TC6_DATA_HEADER_DATA_VALID, data_valid) |
+		     FIELD_PREP(OA_TC6_DATA_HEADER_START_VALID, start_valid) |
+		     FIELD_PREP(OA_TC6_DATA_HEADER_END_VALID, end_valid) |
+		     FIELD_PREP(OA_TC6_DATA_HEADER_END_BYTE_OFFSET,
+				end_byte_offset);
+
+	header |= FIELD_PREP(OA_TC6_DATA_HEADER_PARITY,
+			     oa_tc6_get_parity(header));
+
+	return cpu_to_be32(header);
+}
+
+static void oa_tc6_add_tx_skb_to_spi_buf(struct oa_tc6 *tc6)
+{
+	enum oa_tc6_data_start_valid_info start_valid = OA_TC6_DATA_START_INVALID;
+	enum oa_tc6_data_end_valid_info end_valid = OA_TC6_DATA_END_INVALID;
+	__be32 *tx_buf = tc6->spi_data_tx_buf + tc6->spi_data_tx_buf_offset;
+	u16 remaining_length = tc6->tx_skb->len - tc6->tx_skb_offset;
+	u8 *tx_skb_data = tc6->tx_skb->data + tc6->tx_skb_offset;
+	u8 end_byte_offset = 0;
+	u16 length_to_copy;
+
+	/* Set start valid if the current tx chunk contains the start of the tx
+	 * ethernet frame.
+	 */
+	if (!tc6->tx_skb_offset)
+		start_valid = OA_TC6_DATA_START_VALID;
+
+	/* If the remaining tx skb length is more than the chunk payload size of
+	 * 64 bytes then copy only 64 bytes and leave the ongoing tx skb for
+	 * next tx chunk.
+	 */
+	length_to_copy = min_t(u16, remaining_length, OA_TC6_CHUNK_PAYLOAD_SIZE);
+
+	/* Copy the tx skb data to the tx chunk payload buffer */
+	memcpy(tx_buf + 1, tx_skb_data, length_to_copy);
+	tc6->tx_skb_offset += length_to_copy;
+
+	/* Set end valid if the current tx chunk contains the end of the tx
+	 * ethernet frame.
+	 */
+	if (tc6->tx_skb->len == tc6->tx_skb_offset) {
+		end_valid = OA_TC6_DATA_END_VALID;
+		end_byte_offset = length_to_copy - 1;
+		tc6->tx_skb_offset = 0;
+		tc6->netdev->stats.tx_bytes += tc6->tx_skb->len;
+		tc6->netdev->stats.tx_packets++;
+		kfree_skb(tc6->tx_skb);
+		tc6->tx_skb = NULL;
+	}
+
+	*tx_buf = oa_tc6_prepare_data_header(OA_TC6_DATA_VALID, start_valid,
+					     end_valid, end_byte_offset);
+	tc6->spi_data_tx_buf_offset += OA_TC6_CHUNK_SIZE;
+}
+
+static u16 oa_tc6_prepare_spi_tx_buf_for_tx_skbs(struct oa_tc6 *tc6)
+{
+	u16 used_tx_credits;
+
+	/* Get tx skbs and convert them into tx chunks based on the tx credits
+	 * available.
+	 */
+	for (used_tx_credits = 0; used_tx_credits < tc6->tx_credits;
+	     used_tx_credits++) {
+		if (!tc6->tx_skb)
+			tc6->tx_skb = skb_dequeue(&tc6->tx_skb_q);
+		if (!tc6->tx_skb)
+			break;
+		oa_tc6_add_tx_skb_to_spi_buf(tc6);
+	}
+
+	return used_tx_credits * OA_TC6_CHUNK_SIZE;
+}
+
+static int oa_tc6_try_spi_transfer(struct oa_tc6 *tc6)
+{
+	int ret;
+
+	while (true) {
+		u16 spi_length = 0;
+
+		tc6->spi_data_tx_buf_offset = 0;
+
+		if (tc6->tx_skb || !skb_queue_empty(&tc6->tx_skb_q))
+			spi_length = oa_tc6_prepare_spi_tx_buf_for_tx_skbs(tc6);
+
+		if (spi_length == 0)
+			break;
+
+		ret = oa_tc6_spi_transfer(tc6, OA_TC6_DATA_HEADER, spi_length);
+		if (ret) {
+			netdev_err(tc6->netdev,
+				   "SPI data transfer failed. Restart the system: %d\n",
+				   ret);
+			return ret;
+		}
+
+		ret = oa_tc6_process_spi_data_rx_buf(tc6, spi_length);
+		if (ret) {
+			oa_tc6_cleanup_ongoing_tx_skb(tc6);
+			netdev_err(tc6->netdev,
+				   "Device error. Restart the system\n");
+			return ret;
+		}
+
+		if (skb_queue_len(&tc6->tx_skb_q) < OA_TC6_TX_SKB_QUEUE_SIZE &&
+		    netif_queue_stopped(tc6->netdev))
+			netif_start_queue(tc6->netdev);
+	}
+
+	return 0;
+}
+
+static int oa_tc6_spi_thread_handler(void *data)
+{
+	struct oa_tc6 *tc6 = data;
+	int ret;
+
+	while (likely(!kthread_should_stop())) {
+		/* This kthread will be waken up if there is a tx skb */
+		wait_event_interruptible(tc6->spi_wq,
+					 !skb_queue_empty(&tc6->tx_skb_q) ||
+					 kthread_should_stop());
+		ret = oa_tc6_try_spi_transfer(tc6);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int oa_tc6_update_buffer_status_from_register(struct oa_tc6 *tc6)
+{
+	u32 value;
+	int ret;
+
+	/* Initially tx credits to be updated from the register as there is no
+	 * data transfer performed yet. Later it will be updated from the rx
+	 * footer.
+	 */
+	ret = oa_tc6_read_register(tc6, OA_TC6_REG_BUFFER_STATUS, &value);
+	if (ret)
+		return ret;
+
+	tc6->tx_credits = FIELD_GET(BUFFER_STATUS_TX_CREDITS_AVAILABLE, value);
+
+	return 0;
+}
+
+/**
+ * oa_tc6_start_xmit - function for sending the tx skb which consists ethernet
+ * frame.
+ * @tc6: oa_tc6 struct.
+ * @skb: socket buffer in which the ethernet frame is stored.
+ *
+ * Returns NETDEV_TX_OK if the transmit ethernet frame skb added in the tx_skb_q
+ * otherwise returns NETDEV_TX_BUSY.
+ */
+netdev_tx_t oa_tc6_start_xmit(struct oa_tc6 *tc6, struct sk_buff *skb)
+{
+	if (skb_queue_len(&tc6->tx_skb_q) > OA_TC6_TX_SKB_QUEUE_SIZE) {
+		netif_stop_queue(tc6->netdev);
+		return NETDEV_TX_BUSY;
+	}
+
+	skb_queue_tail(&tc6->tx_skb_q, skb);
+
+	/* Wake spi kthread to perform spi transfer */
+	wake_up_interruptible(&tc6->spi_wq);
+
+	return NETDEV_TX_OK;
+}
+EXPORT_SYMBOL_GPL(oa_tc6_start_xmit);
+
 /**
  * oa_tc6_init - allocates and initializes oa_tc6 structure.
  * @spi: device with which data will be exchanged.
@@ -541,6 +891,16 @@  struct oa_tc6 *oa_tc6_init(struct spi_device *spi, struct net_device *netdev)
 	if (!tc6->spi_ctrl_rx_buf)
 		return NULL;
 
+	tc6->spi_data_tx_buf = devm_kzalloc(&tc6->spi->dev,
+					    OA_TC6_SPI_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!tc6->spi_data_tx_buf)
+		return NULL;
+
+	tc6->spi_data_rx_buf = devm_kzalloc(&tc6->spi->dev,
+					    OA_TC6_SPI_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!tc6->spi_data_rx_buf)
+		return NULL;
+
 	ret = oa_tc6_sw_reset_macphy(tc6);
 	if (ret) {
 		dev_err(&tc6->spi->dev,
@@ -569,6 +929,25 @@  struct oa_tc6 *oa_tc6_init(struct spi_device *spi, struct net_device *netdev)
 		goto phy_exit;
 	}
 
+	ret = oa_tc6_update_buffer_status_from_register(tc6);
+	if (ret) {
+		dev_err(&tc6->spi->dev,
+			"Failed to update buffer status: %d\n", ret);
+		goto phy_exit;
+	}
+
+	skb_queue_head_init(&tc6->tx_skb_q);
+	init_waitqueue_head(&tc6->spi_wq);
+
+	tc6->spi_thread = kthread_run(oa_tc6_spi_thread_handler, tc6,
+				      "oa-tc6-spi-thread");
+	if (IS_ERR(tc6->spi_thread)) {
+		dev_err(&tc6->spi->dev, "Failed to create SPI thread\n");
+		goto phy_exit;
+	}
+
+	sched_set_fifo(tc6->spi_thread);
+
 	return tc6;
 
 phy_exit:
@@ -584,6 +963,9 @@  EXPORT_SYMBOL_GPL(oa_tc6_init);
 void oa_tc6_exit(struct oa_tc6 *tc6)
 {
 	oa_tc6_phy_exit(tc6);
+	kthread_stop(tc6->spi_thread);
+	dev_kfree_skb_any(tc6->tx_skb);
+	skb_queue_purge(&tc6->tx_skb_q);
 }
 EXPORT_SYMBOL_GPL(oa_tc6_exit);
 
diff --git a/include/linux/oa_tc6.h b/include/linux/oa_tc6.h
index 606ba9f1e663..5c7811ac9cbe 100644
--- a/include/linux/oa_tc6.h
+++ b/include/linux/oa_tc6.h
@@ -20,3 +20,4 @@  int oa_tc6_write_registers(struct oa_tc6 *tc6, u32 address, u32 value[],
 int oa_tc6_read_register(struct oa_tc6 *tc6, u32 address, u32 *value);
 int oa_tc6_read_registers(struct oa_tc6 *tc6, u32 address, u32 value[],
 			  u8 length);
+netdev_tx_t oa_tc6_start_xmit(struct oa_tc6 *tc6, struct sk_buff *skb);