diff mbox

[PATCH/RFC,net-next] ravb: RX checksum offload

Message ID 1505221489-12870-1-git-send-email-horms+renesas@verge.net.au (mailing list archive)
State RFC
Delegated to: Simon Horman
Headers show

Commit Message

Simon Horman Sept. 12, 2017, 1:04 p.m. UTC
Add support for RX checksum offload. This is enabled by default and
may be disabled and re-enabled using ethtool:

 # ethtool -K eth0 rx off
 # ethtool -K eth0 rx on

The RAVB provides a simple checksumming scheme which appears to be
completely compatible with CHECKSUM_COMPLETE: a 1's complement sum of
all packet data after the L2 header is appended to packet data; this may
be trivially read by the driver and used to update the skb accordingly.

In terms of performance throughput is close to gigabit line-rate both with
and without RX checksum offload enabled. Perf output, however, appears to
indicate that significantly less time is spent in do_csum(). This is as
expected.

Test results with RX checksum offload enabled:
 # /usr/bin/perf_3.16 record -o /run/perf.data -a netperf -t TCP_MAERTS -H 10.4.3.162
 MIGRATED TCP MAERTS TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.4.3.162 () port 0 AF_INET : demo
 enable_enobufs failed: getprotobyname
 Recv   Send    Send
 Socket Socket  Message  Elapsed
 Size   Size    Size     Time     Throughput
 bytes  bytes   bytes    secs.    10^6bits/sec

  87380  16384  16384    10.00     938.78
 [ perf record: Woken up 14 times to write data ]
 [ perf record: Captured and wrote 3.524 MB /run/perf.data (~153957 samples) ]

 Summary of output of perf report:
    19.49%      ksoftirqd/0  [kernel.kallsyms]  [k] _raw_spin_unlock_irqrestore
     9.88%      ksoftirqd/0  [kernel.kallsyms]  [k] __pi_memcpy
     7.33%      ksoftirqd/0  [kernel.kallsyms]  [k] skb_put
     7.00%      ksoftirqd/0  [kernel.kallsyms]  [k] ravb_poll
     3.89%      ksoftirqd/0  [kernel.kallsyms]  [k] dev_gro_receive
     3.65%          netperf  [kernel.kallsyms]  [k] __arch_copy_to_user
     3.43%          swapper  [kernel.kallsyms]  [k] arch_cpu_idle
     2.77%          swapper  [kernel.kallsyms]  [k] tick_nohz_idle_enter
     1.85%      ksoftirqd/0  [kernel.kallsyms]  [k] __netdev_alloc_skb
     1.80%          swapper  [kernel.kallsyms]  [k] _raw_spin_unlock_irq
     1.64%      ksoftirqd/0  [kernel.kallsyms]  [k] __slab_alloc.isra.79
     1.62%      ksoftirqd/0  [kernel.kallsyms]  [k] __pi___inval_cache_range

Test results without RX checksum offload enabled:
 # /usr/bin/perf_3.16 record -o /run/perf.data -a netperf -t TCP_MAERTS -H 10.4.3.162
 MIGRATED TCP MAERTS TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.4.3.162 () port 0 AF_INET : demo
 enable_enobufs failed: getprotobyname
 Recv   Send    Send
 Socket Socket  Message  Elapsed
 Size   Size    Size     Time     Throughput
 bytes  bytes   bytes    secs.    10^6bits/sec

  87380  16384  16384    10.00     941.09
 [ perf record: Woken up 14 times to write data ]
 [ perf record: Captured and wrote 3.411 MB /run/perf.data (~149040 samples) ]

 Summary of output of perf report:
   17.50%    ksoftirqd/0  [kernel.kallsyms]  [k] _raw_spin_unlock_irqrestore
    10.60%    ksoftirqd/0  [kernel.kallsyms]  [k] __pi_memcpy
     7.91%    ksoftirqd/0  [kernel.kallsyms]  [k] skb_put
     6.95%    ksoftirqd/0  [kernel.kallsyms]  [k] do_csum
     6.22%    ksoftirqd/0  [kernel.kallsyms]  [k] ravb_poll
     3.84%    ksoftirqd/0  [kernel.kallsyms]  [k] dev_gro_receive
     2.53%        netperf  [kernel.kallsyms]  [k] __arch_copy_to_user
     2.53%        swapper  [kernel.kallsyms]  [k] arch_cpu_idle
     2.27%        swapper  [kernel.kallsyms]  [k] tick_nohz_idle_enter
     1.90%    ksoftirqd/0  [kernel.kallsyms]  [k] __pi___inval_cache_range
     1.90%    ksoftirqd/0  [kernel.kallsyms]  [k] __netdev_alloc_skb
     1.52%    ksoftirqd/0  [kernel.kallsyms]  [k] __slab_alloc.isra.79

Above results collected on an R-Car Gen 3 Salvator-X/r8a7796 ES1.0.
Also tested on a R-Car Gen 3 Salvator-X/r8a7795 ES1.0.

By inspection this also appears to be compatible with the ravb found
on R-Car Gen 2 SoCs, however, this patch is currently untested on such
hardware.

Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/net/ethernet/renesas/ravb_main.c | 58 +++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

Comments

Sergei Shtylyov Sept. 13, 2017, 5:54 p.m. UTC | #1
Hello!

On 09/12/2017 04:04 PM, Simon Horman wrote:

> Add support for RX checksum offload. This is enabled by default and
> may be disabled and re-enabled using ethtool:
> 
>   # ethtool -K eth0 rx off
>   # ethtool -K eth0 rx on
> 
> The RAVB provides a simple checksumming scheme which appears to be
> completely compatible with CHECKSUM_COMPLETE: a 1's complement sum of

    Hm, the gen2/3 manuals say calculation doesn't involve bit inversion...

> all packet data after the L2 header is appended to packet data; this may
> be trivially read by the driver and used to update the skb accordingly.
 >
> In terms of performance throughput is close to gigabit line-rate both with
> and without RX checksum offload enabled. Perf output, however, appears to
> indicate that significantly less time is spent in do_csum(). This is as
> expected.

[...]

> By inspection this also appears to be compatible with the ravb found
> on R-Car Gen 2 SoCs, however, this patch is currently untested on such
> hardware.

    I probably won't be able to test it on gen2 too...

> Signed-off-by: Simon Horman <horms+renesas@verge.net.au>

    I'm generally OK with the patch but have some questions/comments below...

> ---
>   drivers/net/ethernet/renesas/ravb_main.c | 58 +++++++++++++++++++++++++++++++-
>   1 file changed, 57 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
> index fdf30bfa403b..7c6438cd7de7 100644
> --- a/drivers/net/ethernet/renesas/ravb_main.c
> +++ b/drivers/net/ethernet/renesas/ravb_main.c
[...]
> @@ -1842,6 +1859,41 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
>   	return phy_mii_ioctl(phydev, req, cmd);
>   }
>   
> +static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
> +{
> +	struct ravb_private *priv = netdev_priv(ndev);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&priv->lock, flags);
> +
> +	/* Disable TX and RX */
> +	ravb_rcv_snd_disable(ndev);
> +
> +	/* Modify RX Checksum setting */
> +	if (enable)
> +		ravb_modify(ndev, ECMR, 0, ECMR_RCSC);

    Please use ECMR_RCSC as the 3rd argument too to conform the common driver 
style.

> +	else
> +		ravb_modify(ndev, ECMR, ECMR_RCSC, 0);

    This *if* can easily be folded into a single ravb_modify() call...

[...]
> @@ -2004,6 +2057,9 @@ static int ravb_probe(struct platform_device *pdev)
>   	if (!ndev)
>   		return -ENOMEM;
>   
> +	ndev->features |= NETIF_F_RXCSUM;
> +	ndev->hw_features |= ndev->features;

    Hum, both fields are 0 before this? Then why not use '=' instead of '|='?
Even if not, why not just use the same value as both the rvalues?

[...]

MBR, Sergei
Simon Horman Sept. 28, 2017, 10:49 a.m. UTC | #2
On Wed, Sep 13, 2017 at 08:54:00PM +0300, Sergei Shtylyov wrote:
> Hello!
> 
> On 09/12/2017 04:04 PM, Simon Horman wrote:
> 
> >Add support for RX checksum offload. This is enabled by default and
> >may be disabled and re-enabled using ethtool:
> >
> >  # ethtool -K eth0 rx off
> >  # ethtool -K eth0 rx on
> >
> >The RAVB provides a simple checksumming scheme which appears to be
> >completely compatible with CHECKSUM_COMPLETE: a 1's complement sum of
> 
>    Hm, the gen2/3 manuals say calculation doesn't involve bit inversion...

Yes, I believe that matches my observation of the values supplied by
the hardware. Empirically this appears to be what the kernel expects.

> >all packet data after the L2 header is appended to packet data; this may
> >be trivially read by the driver and used to update the skb accordingly.
> >
> >In terms of performance throughput is close to gigabit line-rate both with
> >and without RX checksum offload enabled. Perf output, however, appears to
> >indicate that significantly less time is spent in do_csum(). This is as
> >expected.
> 
> [...]
> 
> >By inspection this also appears to be compatible with the ravb found
> >on R-Car Gen 2 SoCs, however, this patch is currently untested on such
> >hardware.
> 
>    I probably won't be able to test it on gen2 too...
> 
> >Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
> 
>    I'm generally OK with the patch but have some questions/comments below...

Thanks, I will try to address them.

> >---
> >  drivers/net/ethernet/renesas/ravb_main.c | 58 +++++++++++++++++++++++++++++++-
> >  1 file changed, 57 insertions(+), 1 deletion(-)
> >
> >diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
> >index fdf30bfa403b..7c6438cd7de7 100644
> >--- a/drivers/net/ethernet/renesas/ravb_main.c
> >+++ b/drivers/net/ethernet/renesas/ravb_main.c
> [...]
> >@@ -1842,6 +1859,41 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
> >  	return phy_mii_ioctl(phydev, req, cmd);
> >  }
> >+static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
> >+{
> >+	struct ravb_private *priv = netdev_priv(ndev);
> >+	unsigned long flags;
> >+
> >+	spin_lock_irqsave(&priv->lock, flags);
> >+
> >+	/* Disable TX and RX */
> >+	ravb_rcv_snd_disable(ndev);
> >+
> >+	/* Modify RX Checksum setting */
> >+	if (enable)
> >+		ravb_modify(ndev, ECMR, 0, ECMR_RCSC);
> 
>    Please use ECMR_RCSC as the 3rd argument too to conform the common driver
> style.
> 
> >+	else
> >+		ravb_modify(ndev, ECMR, ECMR_RCSC, 0);
> 
>    This *if* can easily be folded into a single ravb_modify() call...

Thanks, something like this?

	ravb_modify(ndev, ECMR, ECMR_RCSC, enable ? ECMR_RCSC : 0);

> [...]
> >@@ -2004,6 +2057,9 @@ static int ravb_probe(struct platform_device *pdev)
> >  	if (!ndev)
> >  		return -ENOMEM;
> >+	ndev->features |= NETIF_F_RXCSUM;
> >+	ndev->hw_features |= ndev->features;
> 
>    Hum, both fields are 0 before this? Then why not use '=' instead of '|='?
> Even if not, why not just use the same value as both the rvalues?

I don't feel strongly about this, how about?

	ndev->features = NETIF_F_RXCSUM;
	ndev->hw_features = NETIF_F_RXCSUM;
Sergei Shtylyov Sept. 28, 2017, 7:56 p.m. UTC | #3
Hello!

On 09/28/2017 01:49 PM, Simon Horman wrote:

>>> Add support for RX checksum offload. This is enabled by default and
>>> may be disabled and re-enabled using ethtool:
>>>
>>>   # ethtool -K eth0 rx off
>>>   # ethtool -K eth0 rx on
>>>
>>> The RAVB provides a simple checksumming scheme which appears to be
>>> completely compatible with CHECKSUM_COMPLETE: a 1's complement sum of
>>
>>     Hm, the gen2/3 manuals say calculation doesn't involve bit inversion...
> 
> Yes, I believe that matches my observation of the values supplied by
> the hardware. Empirically this appears to be what the kernel expects.

    Then why you talk of 1's complement here?

>>> all packet data after the L2 header is appended to packet data; this may
>>> be trivially read by the driver and used to update the skb accordingly.
>>>
>>> In terms of performance throughput is close to gigabit line-rate both with
>>> and without RX checksum offload enabled. Perf output, however, appears to
>>> indicate that significantly less time is spent in do_csum(). This is as
>>> expected.
>>
>> [...]
>>
>>> By inspection this also appears to be compatible with the ravb found
>>> on R-Car Gen 2 SoCs, however, this patch is currently untested on such
>>> hardware.
>>
>>     I probably won't be able to test it on gen2 too...
>>
>>> Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
>>
>>     I'm generally OK with the patch but have some questions/comments below...
> 
> Thanks, I will try to address them.
> 
>>> ---
>>>   drivers/net/ethernet/renesas/ravb_main.c | 58 +++++++++++++++++++++++++++++++-
>>>   1 file changed, 57 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
>>> index fdf30bfa403b..7c6438cd7de7 100644
>>> --- a/drivers/net/ethernet/renesas/ravb_main.c
>>> +++ b/drivers/net/ethernet/renesas/ravb_main.c
>> [...]
>>> @@ -1842,6 +1859,41 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
>>>   	return phy_mii_ioctl(phydev, req, cmd);
>>>   }
>>> +static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
>>> +{
>>> +	struct ravb_private *priv = netdev_priv(ndev);
>>> +	unsigned long flags;
>>> +
>>> +	spin_lock_irqsave(&priv->lock, flags);
>>> +
>>> +	/* Disable TX and RX */
>>> +	ravb_rcv_snd_disable(ndev);
>>> +
>>> +	/* Modify RX Checksum setting */
>>> +	if (enable)
>>> +		ravb_modify(ndev, ECMR, 0, ECMR_RCSC);
>>
>>     Please use ECMR_RCSC as the 3rd argument too to conform the common driver
>> style.
>>
>>> +	else
>>> +		ravb_modify(ndev, ECMR, ECMR_RCSC, 0);
>>
>>     This *if* can easily be folded into a single ravb_modify() call...
> 
> Thanks, something like this?
> 
> 	ravb_modify(ndev, ECMR, ECMR_RCSC, enable ? ECMR_RCSC : 0);

    Yes, exactly! :-)

>> [...]
>>> @@ -2004,6 +2057,9 @@ static int ravb_probe(struct platform_device *pdev)
>>>   	if (!ndev)
>>>   		return -ENOMEM;
>>> +	ndev->features |= NETIF_F_RXCSUM;
>>> +	ndev->hw_features |= ndev->features;
>>
>>     Hum, both fields are 0 before this? Then why not use '=' instead of '|='?
>> Even if not, why not just use the same value as both the rvalues?
> 
> I don't feel strongly about this, how about?
> 
> 	ndev->features = NETIF_F_RXCSUM;
> 	ndev->hw_features = NETIF_F_RXCSUM;

    Yes, I think it should work...

MBR, Sergei
diff mbox

Patch

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index fdf30bfa403b..7c6438cd7de7 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -403,8 +403,9 @@  static void ravb_emac_init(struct net_device *ndev)
 	/* Receive frame limit set register */
 	ravb_write(ndev, ndev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, RFLR);
 
-	/* PAUSE prohibition */
+	/* EMAC Mode: PAUSE prohibition; Duplex; RX Checksum; TX; RX */
 	ravb_write(ndev, ECMR_ZPF | (priv->duplex ? ECMR_DM : 0) |
+		   (ndev->features & NETIF_F_RXCSUM ? ECMR_RCSC : 0) |
 		   ECMR_TE | ECMR_RE, ECMR);
 
 	ravb_set_rate(ndev);
@@ -520,6 +521,19 @@  static void ravb_get_tx_tstamp(struct net_device *ndev)
 	}
 }
 
+static void ravb_rx_csum(struct sk_buff *skb)
+{
+	u8 *hw_csum;
+
+	/* The hardware checksum is 2 bytes appended to packet data */
+	if (unlikely(skb->len < 2))
+		return;
+	hw_csum = skb_tail_pointer(skb) - 2;
+	skb->csum = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum));
+	skb->ip_summed = CHECKSUM_COMPLETE;
+	skb_trim(skb, skb->len - 2);
+}
+
 /* Packet receive function for Ethernet AVB */
 static bool ravb_rx(struct net_device *ndev, int *quota, int q)
 {
@@ -587,8 +601,11 @@  static bool ravb_rx(struct net_device *ndev, int *quota, int q)
 				ts.tv_nsec = le32_to_cpu(desc->ts_n);
 				shhwtstamps->hwtstamp = timespec64_to_ktime(ts);
 			}
+
 			skb_put(skb, pkt_len);
 			skb->protocol = eth_type_trans(skb, ndev);
+			if (ndev->features & NETIF_F_RXCSUM)
+				ravb_rx_csum(skb);
 			napi_gro_receive(&priv->napi[q], skb);
 			stats->rx_packets++;
 			stats->rx_bytes += pkt_len;
@@ -1842,6 +1859,41 @@  static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
 	return phy_mii_ioctl(phydev, req, cmd);
 }
 
+static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
+{
+	struct ravb_private *priv = netdev_priv(ndev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	/* Disable TX and RX */
+	ravb_rcv_snd_disable(ndev);
+
+	/* Modify RX Checksum setting */
+	if (enable)
+		ravb_modify(ndev, ECMR, 0, ECMR_RCSC);
+	else
+		ravb_modify(ndev, ECMR, ECMR_RCSC, 0);
+
+	/* Enable TX and RX */
+	ravb_rcv_snd_enable(ndev);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int ravb_set_features(struct net_device *ndev,
+			     netdev_features_t features)
+{
+	netdev_features_t changed = ndev->features ^ features;
+
+	if (changed & NETIF_F_RXCSUM)
+		ravb_set_rx_csum(ndev, features & NETIF_F_RXCSUM);
+
+	ndev->features = features;
+
+	return 0;
+}
+
 static const struct net_device_ops ravb_netdev_ops = {
 	.ndo_open		= ravb_open,
 	.ndo_stop		= ravb_close,
@@ -1853,6 +1905,7 @@  static const struct net_device_ops ravb_netdev_ops = {
 	.ndo_do_ioctl		= ravb_do_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_set_features	= ravb_set_features,
 };
 
 /* MDIO bus init function */
@@ -2004,6 +2057,9 @@  static int ravb_probe(struct platform_device *pdev)
 	if (!ndev)
 		return -ENOMEM;
 
+	ndev->features |= NETIF_F_RXCSUM;
+	ndev->hw_features |= ndev->features;
+
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_get_sync(&pdev->dev);