diff mbox series

[net-next,v3,2/2] net: renesas: rswitch: Use hardware pause features

Message ID 20230607015641.1724057-3-yoshihiro.shimoda.uh@renesas.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: renesas: rswitch: Improve perfromance of TX/RX | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/cc_maintainers warning 1 maintainers not CCed: alexanderduyck@fb.com
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 87 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Yoshihiro Shimoda June 7, 2023, 1:56 a.m. UTC
Since this driver used the "global rate limiter" feature of GWCA,
the TX perfromance of each port was reduced when multiple ports
transmitted frames simultaneously. To improve perfromance, remove
the use of the "global rate limiter" feature and use "hardware pause"
features of the following:
 - "per priority pause" of GWCA
 - "global pause" of COMA

Note that these features are not related to the ethernet PAUSE frame.

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
---
 drivers/net/ethernet/renesas/rswitch.c | 36 ++++++++++----------------
 drivers/net/ethernet/renesas/rswitch.h |  7 +++++
 2 files changed, 21 insertions(+), 22 deletions(-)

Comments

Fijalkowski, Maciej June 7, 2023, 11:09 a.m. UTC | #1
On Wed, Jun 07, 2023 at 10:56:41AM +0900, Yoshihiro Shimoda wrote:
> Since this driver used the "global rate limiter" feature of GWCA,
> the TX perfromance of each port was reduced when multiple ports
> transmitted frames simultaneously. To improve perfromance, remove
> the use of the "global rate limiter" feature and use "hardware pause"
> features of the following:
>  - "per priority pause" of GWCA
>  - "global pause" of COMA
> 
> Note that these features are not related to the ethernet PAUSE frame.
> 
> Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>

Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>

> ---
>  drivers/net/ethernet/renesas/rswitch.c | 36 ++++++++++----------------
>  drivers/net/ethernet/renesas/rswitch.h |  7 +++++
>  2 files changed, 21 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
> index 7bb0a6d594a0..84f62c77eb8f 100644
> --- a/drivers/net/ethernet/renesas/rswitch.c
> +++ b/drivers/net/ethernet/renesas/rswitch.c
> @@ -90,6 +90,11 @@ static int rswitch_bpool_config(struct rswitch_private *priv)
>  	return rswitch_reg_wait(priv->addr, CABPIRM, CABPIRM_BPR, CABPIRM_BPR);
>  }
>  
> +static void rswitch_coma_init(struct rswitch_private *priv)
> +{
> +	iowrite32(CABPPFLC_INIT_VALUE, priv->addr + CABPPFLC0);
> +}
> +
>  /* R-Switch-2 block (TOP) */
>  static void rswitch_top_init(struct rswitch_private *priv)
>  {
> @@ -156,24 +161,6 @@ static int rswitch_gwca_axi_ram_reset(struct rswitch_private *priv)
>  	return rswitch_reg_wait(priv->addr, GWARIRM, GWARIRM_ARR, GWARIRM_ARR);
>  }
>  
> -static void rswitch_gwca_set_rate_limit(struct rswitch_private *priv, int rate)
> -{
> -	u32 gwgrlulc, gwgrlc;
> -
> -	switch (rate) {
> -	case 1000:
> -		gwgrlulc = 0x0000005f;
> -		gwgrlc = 0x00010260;
> -		break;
> -	default:
> -		dev_err(&priv->pdev->dev, "%s: This rate is not supported (%d)\n", __func__, rate);
> -		return;
> -	}
> -
> -	iowrite32(gwgrlulc, priv->addr + GWGRLULC);
> -	iowrite32(gwgrlc, priv->addr + GWGRLC);
> -}
> -
>  static bool rswitch_is_any_data_irq(struct rswitch_private *priv, u32 *dis, bool tx)
>  {
>  	u32 *mask = tx ? priv->gwca.tx_irq_bits : priv->gwca.rx_irq_bits;
> @@ -402,7 +389,7 @@ static int rswitch_gwca_queue_format(struct net_device *ndev,
>  	linkfix->die_dt = DT_LINKFIX;
>  	rswitch_desc_set_dptr(linkfix, gq->ring_dma);
>  
> -	iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DQT : 0) | GWDCC_EDE,
> +	iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DCP(GWCA_IPV_NUM) | GWDCC_DQT : 0) | GWDCC_EDE,
>  		  priv->addr + GWDCC_OFFS(gq->index));
>  
>  	return 0;
> @@ -500,7 +487,8 @@ static int rswitch_gwca_queue_ext_ts_format(struct net_device *ndev,
>  	linkfix->die_dt = DT_LINKFIX;
>  	rswitch_desc_set_dptr(linkfix, gq->ring_dma);
>  
> -	iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DQT : 0) | GWDCC_ETS | GWDCC_EDE,
> +	iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DCP(GWCA_IPV_NUM) | GWDCC_DQT : 0) |
> +		  GWDCC_ETS | GWDCC_EDE,
>  		  priv->addr + GWDCC_OFFS(gq->index));
>  
>  	return 0;
> @@ -649,7 +637,8 @@ static int rswitch_gwca_hw_init(struct rswitch_private *priv)
>  	iowrite32(lower_32_bits(priv->gwca.ts_queue.ring_dma), priv->addr + GWTDCAC10);
>  	iowrite32(upper_32_bits(priv->gwca.ts_queue.ring_dma), priv->addr + GWTDCAC00);
>  	iowrite32(GWCA_TS_IRQ_BIT, priv->addr + GWTSDCC0);
> -	rswitch_gwca_set_rate_limit(priv, priv->gwca.speed);
> +
> +	iowrite32(GWTPC_PPPL(GWCA_IPV_NUM), priv->addr + GWTPC0);
>  
>  	for (i = 0; i < RSWITCH_NUM_PORTS; i++) {
>  		err = rswitch_rxdmac_init(priv, i);
> @@ -1502,7 +1491,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd
>  	rswitch_desc_set_dptr(&desc->desc, dma_addr);
>  	desc->desc.info_ds = cpu_to_le16(skb->len);
>  
> -	desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) | INFO1_FMT);
> +	desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) |
> +				  INFO1_IPV(GWCA_IPV_NUM) | INFO1_FMT);
>  	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
>  		struct rswitch_gwca_ts_info *ts_info;
>  
> @@ -1772,6 +1762,8 @@ static int rswitch_init(struct rswitch_private *priv)
>  	if (err < 0)
>  		return err;
>  
> +	rswitch_coma_init(priv);
> +
>  	err = rswitch_gwca_linkfix_alloc(priv);
>  	if (err < 0)
>  		return -ENOMEM;
> diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h
> index b3e0411b408e..bb9ed971a97c 100644
> --- a/drivers/net/ethernet/renesas/rswitch.h
> +++ b/drivers/net/ethernet/renesas/rswitch.h
> @@ -48,6 +48,7 @@
>  #define GWCA_NUM_IRQS		8
>  #define GWCA_INDEX		0
>  #define AGENT_INDEX_GWCA	3
> +#define GWCA_IPV_NUM		0
>  #define GWRO			RSWITCH_GWCA0_OFFSET
>  
>  #define GWCA_TS_IRQ_RESOURCE_NAME	"gwca0_rxts0"
> @@ -768,11 +769,14 @@ enum rswitch_gwca_mode {
>  #define GWARIRM_ARR		BIT(1)
>  
>  #define GWDCC_BALR		BIT(24)
> +#define GWDCC_DCP_MASK		GENMASK(18, 16)
> +#define GWDCC_DCP(prio)		FIELD_PREP(GWDCC_DCP_MASK, (prio))
>  #define GWDCC_DQT		BIT(11)
>  #define GWDCC_ETS		BIT(9)
>  #define GWDCC_EDE		BIT(8)
>  
>  #define GWTRC(queue)		(GWTRC0 + (queue) / 32 * 4)
> +#define GWTPC_PPPL(ipv)		BIT(ipv)
>  #define GWDCC_OFFS(queue)	(GWDCC0 + (queue) * 4)
>  
>  #define GWDIS(i)		(GWDIS0 + (i) * 0x10)
> @@ -789,6 +793,8 @@ enum rswitch_gwca_mode {
>  #define CABPIRM_BPIOG		BIT(0)
>  #define CABPIRM_BPR		BIT(1)
>  
> +#define CABPPFLC_INIT_VALUE	0x00800080
> +
>  /* MFWD */
>  #define FWPC0_LTHTA		BIT(0)
>  #define FWPC0_IP4UE		BIT(3)
> @@ -863,6 +869,7 @@ enum DIE_DT {
>  
>  /* For transmission */
>  #define INFO1_TSUN(val)		((u64)(val) << 8ULL)
> +#define INFO1_IPV(prio)		((u64)(prio) << 28ULL)
>  #define INFO1_CSD0(index)	((u64)(index) << 32ULL)
>  #define INFO1_CSD1(index)	((u64)(index) << 40ULL)
>  #define INFO1_DV(port_vector)	((u64)(port_vector) << 48ULL)
> -- 
> 2.25.1
>
Simon Horman June 7, 2023, 3:23 p.m. UTC | #2
On Wed, Jun 07, 2023 at 10:56:41AM +0900, Yoshihiro Shimoda wrote:
> Since this driver used the "global rate limiter" feature of GWCA,
> the TX perfromance of each port was reduced when multiple ports

Hi Shimoda-san,

a very minor nit, in case you have to do a v4 for some other reason:

        perfromance -> performance

> transmitted frames simultaneously. To improve perfromance, remove

Here too.

> the use of the "global rate limiter" feature and use "hardware pause"
> features of the following:
>  - "per priority pause" of GWCA
>  - "global pause" of COMA
> 
> Note that these features are not related to the ethernet PAUSE frame.
> 
> Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>

...
Yoshihiro Shimoda June 7, 2023, 11:57 p.m. UTC | #3
Hi Simon-san,

> From: Simon Horman, Sent: Thursday, June 8, 2023 12:24 AM
> 
> On Wed, Jun 07, 2023 at 10:56:41AM +0900, Yoshihiro Shimoda wrote:
> > Since this driver used the "global rate limiter" feature of GWCA,
> > the TX perfromance of each port was reduced when multiple ports
> 
> Hi Shimoda-san,
> 
> a very minor nit, in case you have to do a v4 for some other reason:
> 
>         perfromance -> performance

Thank you for your review! I'll fix it on v4.

> > transmitted frames simultaneously. To improve perfromance, remove
> 
> Here too.

I'll fix this too.

Best regards,
Yoshihiro Shimoda

> > the use of the "global rate limiter" feature and use "hardware pause"
> > features of the following:
> >  - "per priority pause" of GWCA
> >  - "global pause" of COMA
> >
> > Note that these features are not related to the ethernet PAUSE frame.
> >
> > Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
> 
> ...
diff mbox series

Patch

diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index 7bb0a6d594a0..84f62c77eb8f 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -90,6 +90,11 @@  static int rswitch_bpool_config(struct rswitch_private *priv)
 	return rswitch_reg_wait(priv->addr, CABPIRM, CABPIRM_BPR, CABPIRM_BPR);
 }
 
+static void rswitch_coma_init(struct rswitch_private *priv)
+{
+	iowrite32(CABPPFLC_INIT_VALUE, priv->addr + CABPPFLC0);
+}
+
 /* R-Switch-2 block (TOP) */
 static void rswitch_top_init(struct rswitch_private *priv)
 {
@@ -156,24 +161,6 @@  static int rswitch_gwca_axi_ram_reset(struct rswitch_private *priv)
 	return rswitch_reg_wait(priv->addr, GWARIRM, GWARIRM_ARR, GWARIRM_ARR);
 }
 
-static void rswitch_gwca_set_rate_limit(struct rswitch_private *priv, int rate)
-{
-	u32 gwgrlulc, gwgrlc;
-
-	switch (rate) {
-	case 1000:
-		gwgrlulc = 0x0000005f;
-		gwgrlc = 0x00010260;
-		break;
-	default:
-		dev_err(&priv->pdev->dev, "%s: This rate is not supported (%d)\n", __func__, rate);
-		return;
-	}
-
-	iowrite32(gwgrlulc, priv->addr + GWGRLULC);
-	iowrite32(gwgrlc, priv->addr + GWGRLC);
-}
-
 static bool rswitch_is_any_data_irq(struct rswitch_private *priv, u32 *dis, bool tx)
 {
 	u32 *mask = tx ? priv->gwca.tx_irq_bits : priv->gwca.rx_irq_bits;
@@ -402,7 +389,7 @@  static int rswitch_gwca_queue_format(struct net_device *ndev,
 	linkfix->die_dt = DT_LINKFIX;
 	rswitch_desc_set_dptr(linkfix, gq->ring_dma);
 
-	iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DQT : 0) | GWDCC_EDE,
+	iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DCP(GWCA_IPV_NUM) | GWDCC_DQT : 0) | GWDCC_EDE,
 		  priv->addr + GWDCC_OFFS(gq->index));
 
 	return 0;
@@ -500,7 +487,8 @@  static int rswitch_gwca_queue_ext_ts_format(struct net_device *ndev,
 	linkfix->die_dt = DT_LINKFIX;
 	rswitch_desc_set_dptr(linkfix, gq->ring_dma);
 
-	iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DQT : 0) | GWDCC_ETS | GWDCC_EDE,
+	iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DCP(GWCA_IPV_NUM) | GWDCC_DQT : 0) |
+		  GWDCC_ETS | GWDCC_EDE,
 		  priv->addr + GWDCC_OFFS(gq->index));
 
 	return 0;
@@ -649,7 +637,8 @@  static int rswitch_gwca_hw_init(struct rswitch_private *priv)
 	iowrite32(lower_32_bits(priv->gwca.ts_queue.ring_dma), priv->addr + GWTDCAC10);
 	iowrite32(upper_32_bits(priv->gwca.ts_queue.ring_dma), priv->addr + GWTDCAC00);
 	iowrite32(GWCA_TS_IRQ_BIT, priv->addr + GWTSDCC0);
-	rswitch_gwca_set_rate_limit(priv, priv->gwca.speed);
+
+	iowrite32(GWTPC_PPPL(GWCA_IPV_NUM), priv->addr + GWTPC0);
 
 	for (i = 0; i < RSWITCH_NUM_PORTS; i++) {
 		err = rswitch_rxdmac_init(priv, i);
@@ -1502,7 +1491,8 @@  static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd
 	rswitch_desc_set_dptr(&desc->desc, dma_addr);
 	desc->desc.info_ds = cpu_to_le16(skb->len);
 
-	desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) | INFO1_FMT);
+	desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) |
+				  INFO1_IPV(GWCA_IPV_NUM) | INFO1_FMT);
 	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
 		struct rswitch_gwca_ts_info *ts_info;
 
@@ -1772,6 +1762,8 @@  static int rswitch_init(struct rswitch_private *priv)
 	if (err < 0)
 		return err;
 
+	rswitch_coma_init(priv);
+
 	err = rswitch_gwca_linkfix_alloc(priv);
 	if (err < 0)
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h
index b3e0411b408e..bb9ed971a97c 100644
--- a/drivers/net/ethernet/renesas/rswitch.h
+++ b/drivers/net/ethernet/renesas/rswitch.h
@@ -48,6 +48,7 @@ 
 #define GWCA_NUM_IRQS		8
 #define GWCA_INDEX		0
 #define AGENT_INDEX_GWCA	3
+#define GWCA_IPV_NUM		0
 #define GWRO			RSWITCH_GWCA0_OFFSET
 
 #define GWCA_TS_IRQ_RESOURCE_NAME	"gwca0_rxts0"
@@ -768,11 +769,14 @@  enum rswitch_gwca_mode {
 #define GWARIRM_ARR		BIT(1)
 
 #define GWDCC_BALR		BIT(24)
+#define GWDCC_DCP_MASK		GENMASK(18, 16)
+#define GWDCC_DCP(prio)		FIELD_PREP(GWDCC_DCP_MASK, (prio))
 #define GWDCC_DQT		BIT(11)
 #define GWDCC_ETS		BIT(9)
 #define GWDCC_EDE		BIT(8)
 
 #define GWTRC(queue)		(GWTRC0 + (queue) / 32 * 4)
+#define GWTPC_PPPL(ipv)		BIT(ipv)
 #define GWDCC_OFFS(queue)	(GWDCC0 + (queue) * 4)
 
 #define GWDIS(i)		(GWDIS0 + (i) * 0x10)
@@ -789,6 +793,8 @@  enum rswitch_gwca_mode {
 #define CABPIRM_BPIOG		BIT(0)
 #define CABPIRM_BPR		BIT(1)
 
+#define CABPPFLC_INIT_VALUE	0x00800080
+
 /* MFWD */
 #define FWPC0_LTHTA		BIT(0)
 #define FWPC0_IP4UE		BIT(3)
@@ -863,6 +869,7 @@  enum DIE_DT {
 
 /* For transmission */
 #define INFO1_TSUN(val)		((u64)(val) << 8ULL)
+#define INFO1_IPV(prio)		((u64)(prio) << 28ULL)
 #define INFO1_CSD0(index)	((u64)(index) << 32ULL)
 #define INFO1_CSD1(index)	((u64)(index) << 40ULL)
 #define INFO1_DV(port_vector)	((u64)(port_vector) << 48ULL)