diff mbox series

[net-next,5/5] net: renesas: rswitch: Use per-queue rate limiter

Message ID 20230529080840.1156458-6-yoshihiro.shimoda.uh@renesas.com (mailing list archive)
State Under Review
Delegated to: Geert Uytterhoeven
Headers show
Series net: renesas: rswitch: Improve performance of TX | expand

Commit Message

Yoshihiro Shimoda May 29, 2023, 8:08 a.m. UTC
Use per-queue rate limiter instead of global rate limiter. Otherwise
TX performance will be low when we use multiple ports at the same time.

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
---
 drivers/net/ethernet/renesas/rswitch.c | 51 +++++++++++++++++---------
 drivers/net/ethernet/renesas/rswitch.h | 15 +++++++-
 2 files changed, 47 insertions(+), 19 deletions(-)

Comments

kernel test robot May 29, 2023, 8:37 p.m. UTC | #1
Hi Yoshihiro,

kernel test robot noticed the following build errors:

[auto build test ERROR on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Yoshihiro-Shimoda/dt-bindings-net-r8a779f0-ether-switch-Add-ACLK/20230529-161010
base:   net-next/main
patch link:    https://lore.kernel.org/r/20230529080840.1156458-6-yoshihiro.shimoda.uh%40renesas.com
patch subject: [PATCH net-next 5/5] net: renesas: rswitch: Use per-queue rate limiter
config: mips-allyesconfig (https://download.01.org/0day-ci/archive/20230530/202305300454.Kyjag1oy-lkp@intel.com/config)
compiler: mips-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        mkdir -p ~/bin
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/64502944471d5b6fac76f49c06c29edfbbe43935
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Yoshihiro-Shimoda/dt-bindings-net-r8a779f0-ether-switch-Add-ACLK/20230529-161010
        git checkout 64502944471d5b6fac76f49c06c29edfbbe43935
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 ~/bin/make.cross W=1 O=build_dir ARCH=mips olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 ~/bin/make.cross W=1 O=build_dir ARCH=mips SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202305300454.Kyjag1oy-lkp@intel.com/

All errors (new ones prefixed by >>):

   arch/mips/kernel/head.o: in function `kernel_entry':
   (.ref.text+0xac): relocation truncated to fit: R_MIPS_26 against `start_kernel'
   init/main.o: in function `set_reset_devices':
   main.c:(.init.text+0x20): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x30): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `debug_kernel':
   main.c:(.init.text+0xa4): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0xb4): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `quiet_kernel':
   main.c:(.init.text+0x128): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x138): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `warn_bootconfig':
   main.c:(.init.text+0x1ac): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x1bc): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `init_setup':
   main.c:(.init.text+0x234): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x254): additional relocation overflows omitted from the output
   mips-linux-ld: drivers/net/ethernet/renesas/rswitch.o: in function `rswitch_open':
>> rswitch.c:(.text.rswitch_open+0x1a4): undefined reference to `__udivdi3'
kernel test robot May 29, 2023, 11:45 p.m. UTC | #2
Hi Yoshihiro,

kernel test robot noticed the following build errors:

[auto build test ERROR on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Yoshihiro-Shimoda/dt-bindings-net-r8a779f0-ether-switch-Add-ACLK/20230529-161010
base:   net-next/main
patch link:    https://lore.kernel.org/r/20230529080840.1156458-6-yoshihiro.shimoda.uh%40renesas.com
patch subject: [PATCH net-next 5/5] net: renesas: rswitch: Use per-queue rate limiter
config: powerpc-allmodconfig (https://download.01.org/0day-ci/archive/20230530/202305300718.pdF0iaxU-lkp@intel.com/config)
compiler: powerpc-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        mkdir -p ~/bin
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/64502944471d5b6fac76f49c06c29edfbbe43935
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Yoshihiro-Shimoda/dt-bindings-net-r8a779f0-ether-switch-Add-ACLK/20230529-161010
        git checkout 64502944471d5b6fac76f49c06c29edfbbe43935
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 ~/bin/make.cross W=1 O=build_dir ARCH=powerpc olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 ~/bin/make.cross W=1 O=build_dir ARCH=powerpc SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202305300718.pdF0iaxU-lkp@intel.com/

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> ERROR: modpost: "__udivdi3" [drivers/net/ethernet/renesas/rswitch_drv.ko] undefined!
Geert Uytterhoeven May 30, 2023, 7:33 a.m. UTC | #3
Hi Shimoda-san,

On Mon, May 29, 2023 at 10:08 AM Yoshihiro Shimoda
<yoshihiro.shimoda.uh@renesas.com> wrote:
> Use per-queue rate limiter instead of global rate limiter. Otherwise
> TX performance will be low when we use multiple ports at the same time.
>
> Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>

Thanks for your patch!

> --- a/drivers/net/ethernet/renesas/rswitch.c
> +++ b/drivers/net/ethernet/renesas/rswitch.c
> @@ -156,22 +156,31 @@ static int rswitch_gwca_axi_ram_reset(struct rswitch_private *priv)
>         return rswitch_reg_wait(priv->addr, GWARIRM, GWARIRM_ARR, GWARIRM_ARR);
>  }
>
> -static void rswitch_gwca_set_rate_limit(struct rswitch_private *priv, int rate)
> +static void rswitch_gwca_set_rate_limit(struct rswitch_private *priv,
> +                                       struct rswitch_gwca_queue *txq)
>  {
> -       u32 gwgrlulc, gwgrlc;
> +       u64 period_ps;
> +       unsigned long rate;
> +       u32 gwrlc;
>
> -       switch (rate) {
> -       case 1000:
> -               gwgrlulc = 0x0000005f;
> -               gwgrlc = 0x00010260;
> -               break;
> -       default:
> -               dev_err(&priv->pdev->dev, "%s: This rate is not supported (%d)\n", __func__, rate);
> -               return;
> -       }
> +       rate = clk_get_rate(priv->aclk);
> +       if (!rate)
> +               rate = RSWITCH_ACLK_DEFAULT;
> +
> +       period_ps = div64_u64(1000000000000ULL, rate);

div64_ul, as rate is unsigned long.

> +
> +       /* GWRLC value = 256 * ACLK_period[ns] * maxBandwidth[Gbps] */
> +       gwrlc = 256 * period_ps * txq->speed / 1000000;

This contains an open-coded 64-by-32 division, causing link failures
on 32-bit platforms, so you should use div_u64() instead.  However,
because of the premultiplication by speed, which is 32-bit, you can
use the mul_u64_u32_div() helper.

Combining this with the calculation of period_ps above, you can simplify
this to:

    gwrlc = div64_ul(256000000ULL * txq->speed, rate);

> +
> +       /* To avoid overflow internally, the value should be 97% */
> +       gwrlc = gwrlc * 97 / 100;
>
> -       iowrite32(gwgrlulc, priv->addr + GWGRLULC);
> -       iowrite32(gwgrlc, priv->addr + GWGRLC);
> +       dev_dbg(&priv->pdev->dev,
> +               "%s: index = %d, speed = %d, rate = %ld, gwrlc = %08x\n",
> +               __func__, txq->index_trim, txq->speed, rate, gwrlc);
> +
> +       iowrite32(GWRLULC_NOT_REQUIRED, priv->addr + GWRLULC(txq->index_trim));
> +       iowrite32(gwrlc | GWRLC_RLE, priv->addr + GWRLC(txq->index_trim));
>  }
>

Gr{oetje,eeting}s,

                        Geert
Yoshihiro Shimoda May 30, 2023, 11:45 a.m. UTC | #4
Hi Geert-san,

> From: Geert Uytterhoeven, Sent: Tuesday, May 30, 2023 4:33 PM
> 
> Hi Shimoda-san,
> 
> On Mon, May 29, 2023 at 10:08 AM Yoshihiro Shimoda
> <yoshihiro.shimoda.uh@renesas.com> wrote:
> > Use per-queue rate limiter instead of global rate limiter. Otherwise
> > TX performance will be low when we use multiple ports at the same time.
> >
> > Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
> 
> Thanks for your patch!

Thank you for your review!

> > --- a/drivers/net/ethernet/renesas/rswitch.c
> > +++ b/drivers/net/ethernet/renesas/rswitch.c
> > @@ -156,22 +156,31 @@ static int rswitch_gwca_axi_ram_reset(struct rswitch_private *priv)
> >         return rswitch_reg_wait(priv->addr, GWARIRM, GWARIRM_ARR, GWARIRM_ARR);
> >  }
> >
> > -static void rswitch_gwca_set_rate_limit(struct rswitch_private *priv, int rate)
> > +static void rswitch_gwca_set_rate_limit(struct rswitch_private *priv,
> > +                                       struct rswitch_gwca_queue *txq)
> >  {
> > -       u32 gwgrlulc, gwgrlc;
> > +       u64 period_ps;
> > +       unsigned long rate;
> > +       u32 gwrlc;
> >
> > -       switch (rate) {
> > -       case 1000:
> > -               gwgrlulc = 0x0000005f;
> > -               gwgrlc = 0x00010260;
> > -               break;
> > -       default:
> > -               dev_err(&priv->pdev->dev, "%s: This rate is not supported (%d)\n", __func__, rate);
> > -               return;
> > -       }
> > +       rate = clk_get_rate(priv->aclk);
> > +       if (!rate)
> > +               rate = RSWITCH_ACLK_DEFAULT;
> > +
> > +       period_ps = div64_u64(1000000000000ULL, rate);
> 
> div64_ul, as rate is unsigned long.

I see.

> > +
> > +       /* GWRLC value = 256 * ACLK_period[ns] * maxBandwidth[Gbps] */
> > +       gwrlc = 256 * period_ps * txq->speed / 1000000;
> 
> This contains an open-coded 64-by-32 division, causing link failures
> on 32-bit platforms, so you should use div_u64() instead.  However,
> because of the premultiplication by speed, which is 32-bit, you can
> use the mul_u64_u32_div() helper.

Thank you for your comment! After I got emails from kernel test robot,
I realized that I should use such a macro here.

> Combining this with the calculation of period_ps above, you can simplify
> this to:
> 
>     gwrlc = div64_ul(256000000ULL * txq->speed, rate);

Thank you for your suggestion! I'll fix it on v2.

Best regards,
Yoshihiro Shimoda

> > +
> > +       /* To avoid overflow internally, the value should be 97% */
> > +       gwrlc = gwrlc * 97 / 100;
> >
> > -       iowrite32(gwgrlulc, priv->addr + GWGRLULC);
> > -       iowrite32(gwgrlc, priv->addr + GWGRLC);
> > +       dev_dbg(&priv->pdev->dev,
> > +               "%s: index = %d, speed = %d, rate = %ld, gwrlc = %08x\n",
> > +               __func__, txq->index_trim, txq->speed, rate, gwrlc);
> > +
> > +       iowrite32(GWRLULC_NOT_REQUIRED, priv->addr + GWRLULC(txq->index_trim));
> > +       iowrite32(gwrlc | GWRLC_RLE, priv->addr + GWRLC(txq->index_trim));
> >  }
> >
> 
> Gr{oetje,eeting}s,
> 
>                         Geert
> 
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
> 
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like that.
>                                 -- Linus Torvalds
Simon Horman May 30, 2023, 7:29 p.m. UTC | #5
On Mon, May 29, 2023 at 05:08:40PM +0900, Yoshihiro Shimoda wrote:
> Use per-queue rate limiter instead of global rate limiter. Otherwise
> TX performance will be low when we use multiple ports at the same time.
> 
> Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
> ---
>  drivers/net/ethernet/renesas/rswitch.c | 51 +++++++++++++++++---------
>  drivers/net/ethernet/renesas/rswitch.h | 15 +++++++-
>  2 files changed, 47 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
> index 4ae34b0206cd..a7195625a2c7 100644
> --- a/drivers/net/ethernet/renesas/rswitch.c
> +++ b/drivers/net/ethernet/renesas/rswitch.c
> @@ -156,22 +156,31 @@ static int rswitch_gwca_axi_ram_reset(struct rswitch_private *priv)
>  	return rswitch_reg_wait(priv->addr, GWARIRM, GWARIRM_ARR, GWARIRM_ARR);
>  }
>  
> -static void rswitch_gwca_set_rate_limit(struct rswitch_private *priv, int rate)
> +static void rswitch_gwca_set_rate_limit(struct rswitch_private *priv,
> +					struct rswitch_gwca_queue *txq)
>  {
> -	u32 gwgrlulc, gwgrlc;
> +	u64 period_ps;
> +	unsigned long rate;
> +	u32 gwrlc;

Hi Shimoda-san,

a minor not from my side: please use reverse xmas tree order - longest line
to shortest - for local variable declarations in networking code.

	unsigned long rate;
	u64 period_ps;
	u32 gwrlc;
Yoshihiro Shimoda June 6, 2023, 8:59 a.m. UTC | #6
Hi Simon-san,

> From: Simon Horman, Sent: Wednesday, May 31, 2023 4:29 AM
> 
> On Mon, May 29, 2023 at 05:08:40PM +0900, Yoshihiro Shimoda wrote:
> > Use per-queue rate limiter instead of global rate limiter. Otherwise
> > TX performance will be low when we use multiple ports at the same time.
> >
> > Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
> > ---
> >  drivers/net/ethernet/renesas/rswitch.c | 51 +++++++++++++++++---------
> >  drivers/net/ethernet/renesas/rswitch.h | 15 +++++++-
> >  2 files changed, 47 insertions(+), 19 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
> > index 4ae34b0206cd..a7195625a2c7 100644
> > --- a/drivers/net/ethernet/renesas/rswitch.c
> > +++ b/drivers/net/ethernet/renesas/rswitch.c
> > @@ -156,22 +156,31 @@ static int rswitch_gwca_axi_ram_reset(struct rswitch_private *priv)
> >  	return rswitch_reg_wait(priv->addr, GWARIRM, GWARIRM_ARR, GWARIRM_ARR);
> >  }
> >
> > -static void rswitch_gwca_set_rate_limit(struct rswitch_private *priv, int rate)
> > +static void rswitch_gwca_set_rate_limit(struct rswitch_private *priv,
> > +					struct rswitch_gwca_queue *txq)
> >  {
> > -	u32 gwgrlulc, gwgrlc;
> > +	u64 period_ps;
> > +	unsigned long rate;
> > +	u32 gwrlc;
> 
> Hi Shimoda-san,
> 
> a minor not from my side: please use reverse xmas tree order - longest line
> to shortest - for local variable declarations in networking code.
> 
> 	unsigned long rate;
> 	u64 period_ps;
> 	u32 gwrlc;

Thank you for your comment! I completely forgot that network driver should use
reverse xmas tree order...

JFYI, I found another way to improve the performance. So, I dropped this patch on v2 [1].

[1] https://lore.kernel.org/all/20230606085558.1708766-1-yoshihiro.shimoda.uh@renesas.com/

Best regards,
Yoshihiro Shimoda

> --
> pw-bot: cr
diff mbox series

Patch

diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index 4ae34b0206cd..a7195625a2c7 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -156,22 +156,31 @@  static int rswitch_gwca_axi_ram_reset(struct rswitch_private *priv)
 	return rswitch_reg_wait(priv->addr, GWARIRM, GWARIRM_ARR, GWARIRM_ARR);
 }
 
-static void rswitch_gwca_set_rate_limit(struct rswitch_private *priv, int rate)
+static void rswitch_gwca_set_rate_limit(struct rswitch_private *priv,
+					struct rswitch_gwca_queue *txq)
 {
-	u32 gwgrlulc, gwgrlc;
+	u64 period_ps;
+	unsigned long rate;
+	u32 gwrlc;
 
-	switch (rate) {
-	case 1000:
-		gwgrlulc = 0x0000005f;
-		gwgrlc = 0x00010260;
-		break;
-	default:
-		dev_err(&priv->pdev->dev, "%s: This rate is not supported (%d)\n", __func__, rate);
-		return;
-	}
+	rate = clk_get_rate(priv->aclk);
+	if (!rate)
+		rate = RSWITCH_ACLK_DEFAULT;
+
+	period_ps = div64_u64(1000000000000ULL, rate);
+
+	/* GWRLC value = 256 * ACLK_period[ns] * maxBandwidth[Gbps] */
+	gwrlc = 256 * period_ps * txq->speed / 1000000;
+
+	/* To avoid overflow internally, the value should be 97% */
+	gwrlc = gwrlc * 97 / 100;
 
-	iowrite32(gwgrlulc, priv->addr + GWGRLULC);
-	iowrite32(gwgrlc, priv->addr + GWGRLC);
+	dev_dbg(&priv->pdev->dev,
+		"%s: index = %d, speed = %d, rate = %ld, gwrlc = %08x\n",
+		__func__, txq->index_trim, txq->speed, rate, gwrlc);
+
+	iowrite32(GWRLULC_NOT_REQUIRED, priv->addr + GWRLULC(txq->index_trim));
+	iowrite32(gwrlc | GWRLC_RLE, priv->addr + GWRLC(txq->index_trim));
 }
 
 static bool rswitch_is_any_data_irq(struct rswitch_private *priv, u32 *dis, bool tx)
@@ -548,6 +557,10 @@  static struct rswitch_gwca_queue *rswitch_gwca_get(struct rswitch_private *priv,
 	memset(gq, 0, sizeof(*gq));
 	gq->index = index;
 
+	/* The first "Rate limiter" queue is located at GWCA_AXI_CHAIN_N - 1 */
+	if (dir_tx)
+		gq->index_trim = GWCA_AXI_CHAIN_N - index - 1;
+
 	return gq;
 }
 
@@ -651,7 +664,6 @@  static int rswitch_gwca_hw_init(struct rswitch_private *priv)
 	iowrite32(lower_32_bits(priv->gwca.ts_queue.ring_dma), priv->addr + GWTDCAC10);
 	iowrite32(upper_32_bits(priv->gwca.ts_queue.ring_dma), priv->addr + GWTDCAC00);
 	iowrite32(GWCA_TS_IRQ_BIT, priv->addr + GWTSDCC0);
-	rswitch_gwca_set_rate_limit(priv, priv->gwca.speed);
 
 	for (i = 0; i < RSWITCH_NUM_PORTS; i++) {
 		err = rswitch_rxdmac_init(priv, i);
@@ -1441,6 +1453,8 @@  static int rswitch_open(struct net_device *ndev)
 	napi_enable(&rdev->napi);
 	netif_start_queue(ndev);
 
+	rswitch_gwca_set_rate_limit(rdev->priv, rdev->tx_queue);
+
 	rswitch_enadis_data_irq(rdev->priv, rdev->tx_queue->index, true);
 	rswitch_enadis_data_irq(rdev->priv, rdev->rx_queue->index, true);
 
@@ -1723,9 +1737,6 @@  static int rswitch_device_alloc(struct rswitch_private *priv, int index)
 	if (err < 0)
 		goto out_get_params;
 
-	if (rdev->priv->gwca.speed < rdev->etha->speed)
-		rdev->priv->gwca.speed = rdev->etha->speed;
-
 	err = rswitch_rxdmac_alloc(ndev);
 	if (err < 0)
 		goto out_rxdmac;
@@ -1734,6 +1745,8 @@  static int rswitch_device_alloc(struct rswitch_private *priv, int index)
 	if (err < 0)
 		goto out_txdmac;
 
+	rdev->tx_queue->speed = rdev->etha->speed;
+
 	return 0;
 
 out_txdmac:
@@ -1903,6 +1916,10 @@  static int renesas_eth_sw_probe(struct platform_device *pdev)
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_get_sync(&pdev->dev);
 
+	priv->aclk = devm_clk_get_optional(&pdev->dev, "aclk");
+	if (IS_ERR(priv->aclk))
+		return PTR_ERR(priv->aclk);
+
 	ret = rswitch_init(priv);
 	if (ret < 0) {
 		pm_runtime_put(&pdev->dev);
diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h
index 7ba45ddab42a..741f45266c29 100644
--- a/drivers/net/ethernet/renesas/rswitch.h
+++ b/drivers/net/ethernet/renesas/rswitch.h
@@ -7,9 +7,11 @@ 
 #ifndef __RSWITCH_H__
 #define __RSWITCH_H__
 
+#include <linux/clk.h>
 #include <linux/platform_device.h>
 #include "rcar_gen4_ptp.h"
 
+#define RSWITCH_ACLK_DEFAULT	400000000UL
 #define RSWITCH_NUM_PORTS	3
 #define rswitch_for_each_enabled_port(priv, i)		\
 	for (i = 0; i < RSWITCH_NUM_PORTS; i++)		\
@@ -676,7 +678,7 @@  enum rswitch_reg {
 	GWIDACAM10	= GWRO + 0x0984,
 	GWGRLC		= GWRO + 0x0a00,
 	GWGRLULC	= GWRO + 0x0a04,
-	GWRLIVC0	= GWRO + 0x0a80,
+	GWRLC0		= GWRO + 0x0a80,
 	GWRLULC0	= GWRO + 0x0a84,
 	GWIDPC		= GWRO + 0x0b00,
 	GWIDC0		= GWRO + 0x0c00,
@@ -778,6 +780,11 @@  enum rswitch_gwca_mode {
 #define GWTRC(queue)		(GWTRC0 + (queue) / 32 * 4)
 #define GWDCC_OFFS(queue)	(GWDCC0 + (queue) * 4)
 
+#define GWRLC(trim)		(GWRLC0 + (trim) * 8)
+#define GWRLC_RLE		BIT(16)
+#define GWRLULC(trim)		(GWRLULC0 + (trim) * 8)
+#define GWRLULC_NOT_REQUIRED	0x00800000
+
 #define GWDIS(i)		(GWDIS0 + (i) * 0x10)
 #define GWDIE(i)		(GWDIE0 + (i) * 0x10)
 #define GWDID(i)		(GWDID0 + (i) * 0x10)
@@ -942,6 +949,10 @@  struct rswitch_gwca_queue {
 	bool dir_tx;
 	struct sk_buff **skbs;
 	struct net_device *ndev;	/* queue to ndev for irq */
+
+	/* For tx_ring */
+	int index_trim;
+	int speed;
 };
 
 struct rswitch_gwca_ts_info {
@@ -963,7 +974,6 @@  struct rswitch_gwca {
 	DECLARE_BITMAP(used, GWCA_AXI_CHAIN_N);
 	u32 tx_irq_bits[GWCA_NUM_IRQ_REGS];
 	u32 rx_irq_bits[GWCA_NUM_IRQ_REGS];
-	int speed;
 };
 
 #define NUM_QUEUES_PER_NDEV	2
@@ -997,6 +1007,7 @@  struct rswitch_private {
 	struct platform_device *pdev;
 	void __iomem *addr;
 	struct rcar_gen4_ptp_private *ptp_priv;
+	struct clk *aclk;
 
 	struct rswitch_device *rdev[RSWITCH_NUM_PORTS];
 	DECLARE_BITMAP(opened_ports, RSWITCH_NUM_PORTS);