diff mbox series

[net] net: mana: Fix perf regression: remove rx_cqes, tx_cqes counters

Message ID 1684963320-25282-1-git-send-email-haiyangz@microsoft.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net] net: mana: Fix perf regression: remove rx_cqes, tx_cqes counters | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/cc_maintainers fail 1 blamed authors not CCed: shradhagupta@linux.microsoft.com; 2 maintainers not CCed: tglx@linutronix.de shradhagupta@linux.microsoft.com
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 60 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Haiyang Zhang May 24, 2023, 9:22 p.m. UTC
The apc->eth_stats.rx_cqes is one per NIC (vport), and it's on the
frequent and parallel code path of all queues. So, r/w into this
single shared variable by many threads on different CPUs creates a
lot caching and memory overhead, hence perf regression. And, it's
not accurate due to the high volume concurrent r/w.

Since the error path of mana_poll_rx_cq() already has warnings, so
keeping the counter and convert it to a per-queue variable is not
necessary. So, just remove this counter from this high frequency
code path.

Also, remove the tx_cqes counter for the same reason. We have
warnings & other counters for errors on that path, and don't need
to count every normal cqe processing.

Cc: stable@vger.kernel.org
Fixes: bd7fc6e1957c ("net: mana: Add new MANA VF performance counters for easier troubleshooting")
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 drivers/net/ethernet/microsoft/mana/mana_en.c      | 10 ----------
 drivers/net/ethernet/microsoft/mana/mana_ethtool.c |  2 --
 include/net/mana/mana.h                            |  2 --
 3 files changed, 14 deletions(-)

Comments

Horatiu Vultur May 25, 2023, 6:48 a.m. UTC | #1
The 05/24/2023 14:22, Haiyang Zhang wrote:

Hi Haiyang,

> 
> The apc->eth_stats.rx_cqes is one per NIC (vport), and it's on the
> frequent and parallel code path of all queues. So, r/w into this
> single shared variable by many threads on different CPUs creates a
> lot caching and memory overhead, hence perf regression. And, it's
> not accurate due to the high volume concurrent r/w.

Do you have any numbers to show the improvement of this change?

> 
> Since the error path of mana_poll_rx_cq() already has warnings, so
> keeping the counter and convert it to a per-queue variable is not
> necessary. So, just remove this counter from this high frequency
> code path.
> 
> Also, remove the tx_cqes counter for the same reason. We have
> warnings & other counters for errors on that path, and don't need
> to count every normal cqe processing.

Will you not have problems with the counter 'apc->eth_stats.tx_cqe_err'?
It is not in the hot path but you will have concurrent access to it.

> 
> Cc: stable@vger.kernel.org
> Fixes: bd7fc6e1957c ("net: mana: Add new MANA VF performance counters for easier troubleshooting")
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> ---
>  drivers/net/ethernet/microsoft/mana/mana_en.c      | 10 ----------
>  drivers/net/ethernet/microsoft/mana/mana_ethtool.c |  2 --
>  include/net/mana/mana.h                            |  2 --
>  3 files changed, 14 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 06d6292e09b3..d907727c7b7a 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -1279,8 +1279,6 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
>         if (comp_read < 1)
>                 return;
> 
> -       apc->eth_stats.tx_cqes = comp_read;
> -
>         for (i = 0; i < comp_read; i++) {
>                 struct mana_tx_comp_oob *cqe_oob;
> 
> @@ -1363,8 +1361,6 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
>                 WARN_ON_ONCE(1);
> 
>         cq->work_done = pkt_transmitted;
> -
> -       apc->eth_stats.tx_cqes -= pkt_transmitted;
>  }
> 
>  static void mana_post_pkt_rxq(struct mana_rxq *rxq)
> @@ -1626,15 +1622,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
>  {
>         struct gdma_comp *comp = cq->gdma_comp_buf;
>         struct mana_rxq *rxq = cq->rxq;
> -       struct mana_port_context *apc;
>         int comp_read, i;
> 
> -       apc = netdev_priv(rxq->ndev);
> -
>         comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER);
>         WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER);
> 
> -       apc->eth_stats.rx_cqes = comp_read;
>         rxq->xdp_flush = false;
> 
>         for (i = 0; i < comp_read; i++) {
> @@ -1646,8 +1638,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
>                         return;
> 
>                 mana_process_rx_cqe(rxq, cq, &comp[i]);
> -
> -               apc->eth_stats.rx_cqes--;
>         }
> 
>         if (rxq->xdp_flush)
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
> index a64c81410dc1..0dc78679f620 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
> @@ -13,11 +13,9 @@ static const struct {
>  } mana_eth_stats[] = {
>         {"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)},
>         {"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)},
> -       {"tx_cqes", offsetof(struct mana_ethtool_stats, tx_cqes)},
>         {"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)},
>         {"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
>                                         tx_cqe_unknown_type)},
> -       {"rx_cqes", offsetof(struct mana_ethtool_stats, rx_cqes)},
>         {"rx_coalesced_err", offsetof(struct mana_ethtool_stats,
>                                         rx_coalesced_err)},
>         {"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
> diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
> index cd386aa7c7cc..9eef19972845 100644
> --- a/include/net/mana/mana.h
> +++ b/include/net/mana/mana.h
> @@ -347,10 +347,8 @@ struct mana_tx_qp {
>  struct mana_ethtool_stats {
>         u64 stop_queue;
>         u64 wake_queue;
> -       u64 tx_cqes;
>         u64 tx_cqe_err;
>         u64 tx_cqe_unknown_type;
> -       u64 rx_cqes;
>         u64 rx_coalesced_err;
>         u64 rx_cqe_unknown_type;
>  };
> --
> 2.25.1
> 
>
Haiyang Zhang May 25, 2023, 2:34 p.m. UTC | #2
> -----Original Message-----
> From: Horatiu Vultur <horatiu.vultur@microchip.com>
> Sent: Thursday, May 25, 2023 2:49 AM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: linux-hyperv@vger.kernel.org; netdev@vger.kernel.org; Dexuan Cui
> <decui@microsoft.com>; KY Srinivasan <kys@microsoft.com>; Paul Rosswurm
> <paulros@microsoft.com>; olaf@aepfle.de; vkuznets@redhat.com;
> davem@davemloft.net; wei.liu@kernel.org; edumazet@google.com;
> kuba@kernel.org; pabeni@redhat.com; leon@kernel.org; Long Li
> <longli@microsoft.com>; ssengar@linux.microsoft.com; linux-
> rdma@vger.kernel.org; daniel@iogearbox.net; john.fastabend@gmail.com;
> bpf@vger.kernel.org; ast@kernel.org; Ajay Sharma
> <sharmaajay@microsoft.com>; hawk@kernel.org; linux-
> kernel@vger.kernel.org; stable@vger.kernel.org
> Subject: Re: [PATCH net] net: mana: Fix perf regression: remove rx_cqes,
> tx_cqes counters
> 
> [Some people who received this message don't often get email from
> horatiu.vultur@microchip.com. Learn why this is important at
> https://aka.ms/LearnAboutSenderIdentification ]
> 
> The 05/24/2023 14:22, Haiyang Zhang wrote:
> 
> Hi Haiyang,
> 
> >
> > The apc->eth_stats.rx_cqes is one per NIC (vport), and it's on the
> > frequent and parallel code path of all queues. So, r/w into this
> > single shared variable by many threads on different CPUs creates a
> > lot caching and memory overhead, hence perf regression. And, it's
> > not accurate due to the high volume concurrent r/w.
> 
> Do you have any numbers to show the improvement of this change?

The numbers are not published. The perf regression of the previous 
patch is very significant, and this patch eliminates the regression.

> 
> >
> > Since the error path of mana_poll_rx_cq() already has warnings, so
> > keeping the counter and convert it to a per-queue variable is not
> > necessary. So, just remove this counter from this high frequency
> > code path.
> >
> > Also, remove the tx_cqes counter for the same reason. We have
> > warnings & other counters for errors on that path, and don't need
> > to count every normal cqe processing.
> 
> Will you not have problems with the counter 'apc->eth_stats.tx_cqe_err'?
> It is not in the hot path but you will have concurrent access to it.

Yes, but that error happens rarely, so a shared variable is good enough. So, I 
don't change it in this patch.

Thanks,
- Haiyang
Horatiu Vultur May 26, 2023, 6:45 a.m. UTC | #3
The 05/25/2023 14:34, Haiyang Zhang wrote:
> 
> > -----Original Message-----
> > From: Horatiu Vultur <horatiu.vultur@microchip.com>
> > Sent: Thursday, May 25, 2023 2:49 AM
> > To: Haiyang Zhang <haiyangz@microsoft.com>
> > Cc: linux-hyperv@vger.kernel.org; netdev@vger.kernel.org; Dexuan Cui
> > <decui@microsoft.com>; KY Srinivasan <kys@microsoft.com>; Paul Rosswurm
> > <paulros@microsoft.com>; olaf@aepfle.de; vkuznets@redhat.com;
> > davem@davemloft.net; wei.liu@kernel.org; edumazet@google.com;
> > kuba@kernel.org; pabeni@redhat.com; leon@kernel.org; Long Li
> > <longli@microsoft.com>; ssengar@linux.microsoft.com; linux-
> > rdma@vger.kernel.org; daniel@iogearbox.net; john.fastabend@gmail.com;
> > bpf@vger.kernel.org; ast@kernel.org; Ajay Sharma
> > <sharmaajay@microsoft.com>; hawk@kernel.org; linux-
> > kernel@vger.kernel.org; stable@vger.kernel.org
> > Subject: Re: [PATCH net] net: mana: Fix perf regression: remove rx_cqes,
> > tx_cqes counters
> >
> > [Some people who received this message don't often get email from
> > horatiu.vultur@microchip.com. Learn why this is important at
> > https://aka.ms/LearnAboutSenderIdentification ]
> >
> > The 05/24/2023 14:22, Haiyang Zhang wrote:
> >
> > Hi Haiyang,
> >
> > >
> > > The apc->eth_stats.rx_cqes is one per NIC (vport), and it's on the
> > > frequent and parallel code path of all queues. So, r/w into this
> > > single shared variable by many threads on different CPUs creates a
> > > lot caching and memory overhead, hence perf regression. And, it's
> > > not accurate due to the high volume concurrent r/w.
> >
> > Do you have any numbers to show the improvement of this change?
> 
> The numbers are not published. The perf regression of the previous
> patch is very significant, and this patch eliminates the regression.
> 
> >
> > >
> > > Since the error path of mana_poll_rx_cq() already has warnings, so
> > > keeping the counter and convert it to a per-queue variable is not
> > > necessary. So, just remove this counter from this high frequency
> > > code path.
> > >
> > > Also, remove the tx_cqes counter for the same reason. We have
> > > warnings & other counters for errors on that path, and don't need
> > > to count every normal cqe processing.
> >
> > Will you not have problems with the counter 'apc->eth_stats.tx_cqe_err'?
> > It is not in the hot path but you will have concurrent access to it.
> 
> Yes, but that error happens rarely, so a shared variable is good enough. So, I
> don't change it in this patch.

OK, I understand.
Maybe this can be fixed in a different patch at a later point. Thanks.

Reviwed-by: Horatiu Vultur <horatiu.vultur@microchip.com>

> 
> Thanks,
> - Haiyang
>
Jiri Pirko May 26, 2023, 10:44 a.m. UTC | #4
Wed, May 24, 2023 at 11:22:00PM CEST, haiyangz@microsoft.com wrote:
>The apc->eth_stats.rx_cqes is one per NIC (vport), and it's on the
>frequent and parallel code path of all queues. So, r/w into this
>single shared variable by many threads on different CPUs creates a
>lot caching and memory overhead, hence perf regression. And, it's
>not accurate due to the high volume concurrent r/w.
>
>Since the error path of mana_poll_rx_cq() already has warnings, so
>keeping the counter and convert it to a per-queue variable is not
>necessary. So, just remove this counter from this high frequency
>code path.
>
>Also, remove the tx_cqes counter for the same reason. We have
>warnings & other counters for errors on that path, and don't need
>to count every normal cqe processing.
>
>Cc: stable@vger.kernel.org
>Fixes: bd7fc6e1957c ("net: mana: Add new MANA VF performance counters for easier troubleshooting")
>Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>

Reviewed-by: Jiri Pirko <jiri@nvidia.com>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 06d6292e09b3..d907727c7b7a 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1279,8 +1279,6 @@  static void mana_poll_tx_cq(struct mana_cq *cq)
 	if (comp_read < 1)
 		return;
 
-	apc->eth_stats.tx_cqes = comp_read;
-
 	for (i = 0; i < comp_read; i++) {
 		struct mana_tx_comp_oob *cqe_oob;
 
@@ -1363,8 +1361,6 @@  static void mana_poll_tx_cq(struct mana_cq *cq)
 		WARN_ON_ONCE(1);
 
 	cq->work_done = pkt_transmitted;
-
-	apc->eth_stats.tx_cqes -= pkt_transmitted;
 }
 
 static void mana_post_pkt_rxq(struct mana_rxq *rxq)
@@ -1626,15 +1622,11 @@  static void mana_poll_rx_cq(struct mana_cq *cq)
 {
 	struct gdma_comp *comp = cq->gdma_comp_buf;
 	struct mana_rxq *rxq = cq->rxq;
-	struct mana_port_context *apc;
 	int comp_read, i;
 
-	apc = netdev_priv(rxq->ndev);
-
 	comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER);
 	WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER);
 
-	apc->eth_stats.rx_cqes = comp_read;
 	rxq->xdp_flush = false;
 
 	for (i = 0; i < comp_read; i++) {
@@ -1646,8 +1638,6 @@  static void mana_poll_rx_cq(struct mana_cq *cq)
 			return;
 
 		mana_process_rx_cqe(rxq, cq, &comp[i]);
-
-		apc->eth_stats.rx_cqes--;
 	}
 
 	if (rxq->xdp_flush)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index a64c81410dc1..0dc78679f620 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -13,11 +13,9 @@  static const struct {
 } mana_eth_stats[] = {
 	{"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)},
 	{"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)},
-	{"tx_cqes", offsetof(struct mana_ethtool_stats, tx_cqes)},
 	{"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)},
 	{"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
 					tx_cqe_unknown_type)},
-	{"rx_cqes", offsetof(struct mana_ethtool_stats, rx_cqes)},
 	{"rx_coalesced_err", offsetof(struct mana_ethtool_stats,
 					rx_coalesced_err)},
 	{"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index cd386aa7c7cc..9eef19972845 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -347,10 +347,8 @@  struct mana_tx_qp {
 struct mana_ethtool_stats {
 	u64 stop_queue;
 	u64 wake_queue;
-	u64 tx_cqes;
 	u64 tx_cqe_err;
 	u64 tx_cqe_unknown_type;
-	u64 rx_cqes;
 	u64 rx_coalesced_err;
 	u64 rx_cqe_unknown_type;
 };