diff mbox series

[01/13] RDMA: Split the alloc_hw_stats() ops to port and device variants

Message ID 1-v1-34c90fa45f1c+3c7b0-port_sysfs_jgg@nvidia.com (mailing list archive)
State Changes Requested
Headers show
Series Reorganize sysfs file creation for struct ib_devices | expand

Commit Message

Jason Gunthorpe May 17, 2021, 4:47 p.m. UTC
This is being used to implement both the port and device global stats,
which is causing some confusion in the drivers. For instance EFA and i40iw
both seem to be misusing the device stats.

Split it into two ops so drivers that don't support one or the other can
leave the op NULL'd, making the calling code a little simpler to
understand.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/counters.c          |  4 +-
 drivers/infiniband/core/device.c            |  3 +-
 drivers/infiniband/core/nldev.c             |  2 +-
 drivers/infiniband/core/sysfs.c             | 15 +++-
 drivers/infiniband/hw/bnxt_re/hw_counters.c |  7 +-
 drivers/infiniband/hw/bnxt_re/hw_counters.h |  4 +-
 drivers/infiniband/hw/bnxt_re/main.c        |  2 +-
 drivers/infiniband/hw/cxgb4/provider.c      |  9 +--
 drivers/infiniband/hw/efa/efa.h             |  3 +-
 drivers/infiniband/hw/efa/efa_main.c        |  3 +-
 drivers/infiniband/hw/efa/efa_verbs.c       | 11 ++-
 drivers/infiniband/hw/hfi1/verbs.c          | 86 ++++++++++-----------
 drivers/infiniband/hw/i40iw/i40iw_verbs.c   | 19 ++++-
 drivers/infiniband/hw/mlx4/main.c           | 25 ++++--
 drivers/infiniband/hw/mlx5/counters.c       | 42 +++++++---
 drivers/infiniband/sw/rxe/rxe_hw_counters.c |  7 +-
 drivers/infiniband/sw/rxe/rxe_hw_counters.h |  4 +-
 drivers/infiniband/sw/rxe/rxe_verbs.c       |  2 +-
 include/rdma/ib_verbs.h                     | 13 ++--
 19 files changed, 158 insertions(+), 103 deletions(-)

Comments

Saleem, Shiraz May 17, 2021, 11:06 p.m. UTC | #1
> Subject: [PATCH 01/13] RDMA: Split the alloc_hw_stats() ops to port and device
> variants
> 
> This is being used to implement both the port and device global stats, which is
> causing some confusion in the drivers. For instance EFA and i40iw both seem to
> be misusing the device stats.
> 
> Split it into two ops so drivers that don't support one or the other can leave the op
> NULL'd, making the calling code a little simpler to understand.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/infiniband/core/counters.c          |  4 +-
>  drivers/infiniband/core/device.c            |  3 +-
>  drivers/infiniband/core/nldev.c             |  2 +-
>  drivers/infiniband/core/sysfs.c             | 15 +++-
>  drivers/infiniband/hw/bnxt_re/hw_counters.c |  7 +-
> drivers/infiniband/hw/bnxt_re/hw_counters.h |  4 +-
>  drivers/infiniband/hw/bnxt_re/main.c        |  2 +-
>  drivers/infiniband/hw/cxgb4/provider.c      |  9 +--
>  drivers/infiniband/hw/efa/efa.h             |  3 +-
>  drivers/infiniband/hw/efa/efa_main.c        |  3 +-
>  drivers/infiniband/hw/efa/efa_verbs.c       | 11 ++-
>  drivers/infiniband/hw/hfi1/verbs.c          | 86 ++++++++++-----------
>  drivers/infiniband/hw/i40iw/i40iw_verbs.c   | 19 ++++-
>  drivers/infiniband/hw/mlx4/main.c           | 25 ++++--
>  drivers/infiniband/hw/mlx5/counters.c       | 42 +++++++---
>  drivers/infiniband/sw/rxe/rxe_hw_counters.c |  7 +-
> drivers/infiniband/sw/rxe/rxe_hw_counters.h |  4 +-
>  drivers/infiniband/sw/rxe/rxe_verbs.c       |  2 +-
>  include/rdma/ib_verbs.h                     | 13 ++--
>  19 files changed, 158 insertions(+), 103 deletions(-)
> 

[...]

>  /**
> - * i40iw_alloc_hw_stats - Allocate a hw stats structure
> + * i40iw_alloc_hw_port_stats - Allocate a hw stats structure
>   * @ibdev: device pointer from stack
>   * @port_num: port number
>   */
> -static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev,
> -						  u32 port_num)
> +static struct rdma_hw_stats *i40iw_alloc_hw_port_stats(struct ib_device *ibdev,
> +						       u32 port_num)
>  {
>  	struct i40iw_device *iwdev = to_iwdev(ibdev);
>  	struct i40iw_sc_dev *dev = &iwdev->sc_dev; @@ -2468,6 +2468,16 @@
> static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev,
>  					  lifespan);
>  }
> 
> +static struct rdma_hw_stats *
> +i40iw_alloc_hw_device_stats(struct ib_device *ibdev) {
> +	/*
> +	 * It is probably a bug that i40iw reports its port stats as device
> +	 * stats
> +	 */

The number of physical ports per ib device is 1.

> +	return i40iw_alloc_hw_port_stats(ibdev, 0); }
> +
>  /**
Jason Gunthorpe May 17, 2021, 11:10 p.m. UTC | #2
On Mon, May 17, 2021 at 11:06:29PM +0000, Saleem, Shiraz wrote:
> > Subject: [PATCH 01/13] RDMA: Split the alloc_hw_stats() ops to port and device
> > variants
> > 
> > This is being used to implement both the port and device global stats, which is
> > causing some confusion in the drivers. For instance EFA and i40iw both seem to
> > be misusing the device stats.
> > 
> > Split it into two ops so drivers that don't support one or the other can leave the op
> > NULL'd, making the calling code a little simpler to understand.
> > 
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> >  drivers/infiniband/core/counters.c          |  4 +-
> >  drivers/infiniband/core/device.c            |  3 +-
> >  drivers/infiniband/core/nldev.c             |  2 +-
> >  drivers/infiniband/core/sysfs.c             | 15 +++-
> >  drivers/infiniband/hw/bnxt_re/hw_counters.c |  7 +-
> > drivers/infiniband/hw/bnxt_re/hw_counters.h |  4 +-
> >  drivers/infiniband/hw/bnxt_re/main.c        |  2 +-
> >  drivers/infiniband/hw/cxgb4/provider.c      |  9 +--
> >  drivers/infiniband/hw/efa/efa.h             |  3 +-
> >  drivers/infiniband/hw/efa/efa_main.c        |  3 +-
> >  drivers/infiniband/hw/efa/efa_verbs.c       | 11 ++-
> >  drivers/infiniband/hw/hfi1/verbs.c          | 86 ++++++++++-----------
> >  drivers/infiniband/hw/i40iw/i40iw_verbs.c   | 19 ++++-
> >  drivers/infiniband/hw/mlx4/main.c           | 25 ++++--
> >  drivers/infiniband/hw/mlx5/counters.c       | 42 +++++++---
> >  drivers/infiniband/sw/rxe/rxe_hw_counters.c |  7 +-
> > drivers/infiniband/sw/rxe/rxe_hw_counters.h |  4 +-
> >  drivers/infiniband/sw/rxe/rxe_verbs.c       |  2 +-
> >  include/rdma/ib_verbs.h                     | 13 ++--
> >  19 files changed, 158 insertions(+), 103 deletions(-)
> > 
> 
> [...]
> 
> >  /**
> > - * i40iw_alloc_hw_stats - Allocate a hw stats structure
> > + * i40iw_alloc_hw_port_stats - Allocate a hw stats structure
> >   * @ibdev: device pointer from stack
> >   * @port_num: port number
> >   */
> > -static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev,
> > -						  u32 port_num)
> > +static struct rdma_hw_stats *i40iw_alloc_hw_port_stats(struct ib_device *ibdev,
> > +						       u32 port_num)
> >  {
> >  	struct i40iw_device *iwdev = to_iwdev(ibdev);
> >  	struct i40iw_sc_dev *dev = &iwdev->sc_dev; @@ -2468,6 +2468,16 @@
> > static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev,
> >  					  lifespan);
> >  }
> > 
> > +static struct rdma_hw_stats *
> > +i40iw_alloc_hw_device_stats(struct ib_device *ibdev) {
> > +	/*
> > +	 * It is probably a bug that i40iw reports its port stats as device
> > +	 * stats
> > +	 */
> 
> The number of physical ports per ib device is 1.

Does something skip the port stats in this case? I don't see anything
like that?

What does the sysfs look like? Aren't there duplicated HW stats?

Jason
Saleem, Shiraz May 18, 2021, 12:18 a.m. UTC | #3
> Subject: Re: [PATCH 01/13] RDMA: Split the alloc_hw_stats() ops to port and
> device variants
> 
> On Mon, May 17, 2021 at 11:06:29PM +0000, Saleem, Shiraz wrote:
> > > Subject: [PATCH 01/13] RDMA: Split the alloc_hw_stats() ops to port
> > > and device variants
> > >
> > > This is being used to implement both the port and device global
> > > stats, which is causing some confusion in the drivers. For instance
> > > EFA and i40iw both seem to be misusing the device stats.
> > >
> > > Split it into two ops so drivers that don't support one or the other
> > > can leave the op NULL'd, making the calling code a little simpler to
> understand.
> > >
> > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > >  drivers/infiniband/core/counters.c          |  4 +-
> > >  drivers/infiniband/core/device.c            |  3 +-
> > >  drivers/infiniband/core/nldev.c             |  2 +-
> > >  drivers/infiniband/core/sysfs.c             | 15 +++-
> > >  drivers/infiniband/hw/bnxt_re/hw_counters.c |  7 +-
> > > drivers/infiniband/hw/bnxt_re/hw_counters.h |  4 +-
> > >  drivers/infiniband/hw/bnxt_re/main.c        |  2 +-
> > >  drivers/infiniband/hw/cxgb4/provider.c      |  9 +--
> > >  drivers/infiniband/hw/efa/efa.h             |  3 +-
> > >  drivers/infiniband/hw/efa/efa_main.c        |  3 +-
> > >  drivers/infiniband/hw/efa/efa_verbs.c       | 11 ++-
> > >  drivers/infiniband/hw/hfi1/verbs.c          | 86 ++++++++++-----------
> > >  drivers/infiniband/hw/i40iw/i40iw_verbs.c   | 19 ++++-
> > >  drivers/infiniband/hw/mlx4/main.c           | 25 ++++--
> > >  drivers/infiniband/hw/mlx5/counters.c       | 42 +++++++---
> > >  drivers/infiniband/sw/rxe/rxe_hw_counters.c |  7 +-
> > > drivers/infiniband/sw/rxe/rxe_hw_counters.h |  4 +-
> > >  drivers/infiniband/sw/rxe/rxe_verbs.c       |  2 +-
> > >  include/rdma/ib_verbs.h                     | 13 ++--
> > >  19 files changed, 158 insertions(+), 103 deletions(-)
> > >
> >
> > [...]
> >
> > >  /**
> > > - * i40iw_alloc_hw_stats - Allocate a hw stats structure
> > > + * i40iw_alloc_hw_port_stats - Allocate a hw stats structure
> > >   * @ibdev: device pointer from stack
> > >   * @port_num: port number
> > >   */
> > > -static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev,
> > > -						  u32 port_num)
> > > +static struct rdma_hw_stats *i40iw_alloc_hw_port_stats(struct ib_device
> *ibdev,
> > > +						       u32 port_num)
> > >  {
> > >  	struct i40iw_device *iwdev = to_iwdev(ibdev);
> > >  	struct i40iw_sc_dev *dev = &iwdev->sc_dev; @@ -2468,6 +2468,16 @@
> > > static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev,
> > >  					  lifespan);
> > >  }
> > >
> > > +static struct rdma_hw_stats *
> > > +i40iw_alloc_hw_device_stats(struct ib_device *ibdev) {
> > > +	/*
> > > +	 * It is probably a bug that i40iw reports its port stats as device
> > > +	 * stats
> > > +	 */
> >
> > The number of physical ports per ib device is 1.
> 
> Does something skip the port stats in this case? I don't see anything like that?

No I don't see any check perse to prevent port stats from getting initialized.
> 
> What does the sysfs look like? Aren't there duplicated HW stats?
> 

Yeah it is duplicated. So we are saying for phys_port_cnt = 1, we want the stats to show up in only place?


# cd /sys/class/infiniband/iwp3s0f0/hw_counters; for f in *; do echo -n "$f: "; cat "$f"; done; cd -
cnpHandled: 0
cnpIgnored: 0
cnpSent: 0
ip4InDiscards: 0
ip4InMcastOctets: 0
ip4InMcastPkts: 0
ip4InOctets: 5884668
ip4InPkts: 87399
ip4InReasmRqd: 0
ip4InTruncatedPkts: 0
ip4OutMcastOctets: 0
ip4OutMcastPkts: 0
ip4OutNoRoutes: 0
ip4OutOctets: 7224092
ip4OutPkts: 101959
ip4OutSegRqd: 0
ip6InDiscards: 0
ip6InMcastOctets: 0
ip6InMcastPkts: 0
ip6InOctets: 0
ip6InPkts: 0
ip6InReasmRqd: 0
ip6InTruncatedPkts: 0
ip6OutMcastOctets: 0
ip6OutMcastPkts: 0
ip6OutNoRoutes: 0
ip6OutOctets: 0
ip6OutPkts: 0
ip6OutSegRqd: 0
iwInRdmaReads: 3
iwInRdmaSends: 29127
iwInRdmaWrites: 0
iwOutRdmaReads: 14564
iwOutRdmaSends: 29126
iwOutRdmaWrites: 14562
iwRdmaBnd: 0
iwRdmaInv: 0
lifespan: 10
RxECNMrkd: 0
RxUDP: 9
rxVlanErrors: 0
tcpInOptErrors: 0
tcpInProtoErrors: 0
tcpInSegs: 87399
tcpOutSegs: 101959
tcpRetransSegs: 0
TxUDP: 0
/sys/class/infiniband/iwp3s0f0/hw_counters


# cd /sys/class/infiniband/iwp3s0f0/ports/1/hw_counters; for f in *; do echo -n "$f: "; cat "$f"; done; cd -
cnpHandled: 0
cnpIgnored: 0
cnpSent: 0
ip4InDiscards: 0
ip4InMcastOctets: 0
ip4InMcastPkts: 0
ip4InOctets: 5884668
ip4InPkts: 87399
ip4InReasmRqd: 0
ip4InTruncatedPkts: 0
ip4OutMcastOctets: 0
ip4OutMcastPkts: 0
ip4OutNoRoutes: 0
ip4OutOctets: 7224092
ip4OutPkts: 101959
ip4OutSegRqd: 0
ip6InDiscards: 0
ip6InMcastOctets: 0
ip6InMcastPkts: 0
ip6InOctets: 0
ip6InPkts: 0
ip6InReasmRqd: 0
ip6InTruncatedPkts: 0
ip6OutMcastOctets: 0
ip6OutMcastPkts: 0
ip6OutNoRoutes: 0
ip6OutOctets: 0
ip6OutPkts: 0
ip6OutSegRqd: 0
iwInRdmaReads: 3
iwInRdmaSends: 29127
iwInRdmaWrites: 0
iwOutRdmaReads: 14564
iwOutRdmaSends: 29126
iwOutRdmaWrites: 14562
iwRdmaBnd: 0
iwRdmaInv: 0
lifespan: 10
RxECNMrkd: 0
RxUDP: 9
rxVlanErrors: 0
tcpInOptErrors: 0
tcpInProtoErrors: 0
tcpInSegs: 87399
tcpOutSegs: 101959
tcpRetransSegs: 0
TxUDP: 0
/sys/class/infiniband/iwp3s0f0/hw_counters
Jason Gunthorpe May 18, 2021, 12:20 a.m. UTC | #4
On Tue, May 18, 2021 at 12:18:13AM +0000, Saleem, Shiraz wrote:

> > What does the sysfs look like? Aren't there duplicated HW stats?
> 
> 
> Yeah it is duplicated. So we are saying for phys_port_cnt = 1, we
> want the stats to show up in only place?

Yes.

Imagine you had a multi port device, and assign the stats
appropriately.

I didn't see anything in the list that made me think "device stat" but
I don't know what several of these do

If you can confirm that these are all port stats I can delete the
device stats in this series.

Jason
Mark Zhang May 18, 2021, 3:49 a.m. UTC | #5
On 5/18/2021 12:47 AM, Jason Gunthorpe wrote:
> External email: Use caution opening links or attachments
> 
> 
> This is being used to implement both the port and device global stats,
> which is causing some confusion in the drivers. For instance EFA and i40iw
> both seem to be misusing the device stats.
> 
> Split it into two ops so drivers that don't support one or the other can
> leave the op NULL'd, making the calling code a little simpler to
> understand.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>   drivers/infiniband/core/counters.c          |  4 +-
>   drivers/infiniband/core/device.c            |  3 +-
>   drivers/infiniband/core/nldev.c             |  2 +-
>   drivers/infiniband/core/sysfs.c             | 15 +++-
>   drivers/infiniband/hw/bnxt_re/hw_counters.c |  7 +-
>   drivers/infiniband/hw/bnxt_re/hw_counters.h |  4 +-
>   drivers/infiniband/hw/bnxt_re/main.c        |  2 +-
>   drivers/infiniband/hw/cxgb4/provider.c      |  9 +--
>   drivers/infiniband/hw/efa/efa.h             |  3 +-
>   drivers/infiniband/hw/efa/efa_main.c        |  3 +-
>   drivers/infiniband/hw/efa/efa_verbs.c       | 11 ++-
>   drivers/infiniband/hw/hfi1/verbs.c          | 86 ++++++++++-----------
>   drivers/infiniband/hw/i40iw/i40iw_verbs.c   | 19 ++++-
>   drivers/infiniband/hw/mlx4/main.c           | 25 ++++--
>   drivers/infiniband/hw/mlx5/counters.c       | 42 +++++++---
>   drivers/infiniband/sw/rxe/rxe_hw_counters.c |  7 +-
>   drivers/infiniband/sw/rxe/rxe_hw_counters.h |  4 +-
>   drivers/infiniband/sw/rxe/rxe_verbs.c       |  2 +-
>   include/rdma/ib_verbs.h                     | 13 ++--
>   19 files changed, 158 insertions(+), 103 deletions(-)
> 

[...]

> diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
> index 05b702de00e89b..29082d8d45fc4f 100644
> --- a/drivers/infiniband/core/sysfs.c
> +++ b/drivers/infiniband/core/sysfs.c
> @@ -981,8 +981,15 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
>          struct rdma_hw_stats *stats;
>          int i, ret;
> 
> -       stats = device->ops.alloc_hw_stats(device, port_num);
> -
> +       if (port_num) {
> +               if (!device->ops.alloc_hw_port_stats)
> +                       return;

Do we need this "if (!device->ops.alloc_hw_port_stats)" here?

> +               stats = device->ops.alloc_hw_port_stats(device, port_num);
> +       } else {
> +               if (!device->ops.alloc_hw_device_stats)
> +                       return;

And here?

> +               stats = device->ops.alloc_hw_device_stats(device);
> +       }
>          if (!stats)
>                  return;
> 
> @@ -1165,7 +1172,7 @@ static int add_port(struct ib_core_device *coredev, int port_num)
>           * port, so holder should be device. Therefore skip per port conunter
>           * initialization.
>           */
> -       if (device->ops.alloc_hw_stats && port_num && is_full_dev)
> +       if (device->ops.alloc_hw_port_stats && port_num && is_full_dev)
>                  setup_hw_stats(device, p, port_num);
> 
>          list_add_tail(&p->kobj.entry, &coredev->port_list);
> @@ -1409,7 +1416,7 @@ int ib_device_register_sysfs(struct ib_device *device)
>          if (ret)
>                  return ret;
> 
> -       if (device->ops.alloc_hw_stats)
> +       if (device->ops.alloc_hw_device_stats)
>                  setup_hw_stats(device, NULL, 0);
> 
>          return 0;

[...]

> diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
> index 3f1893e180ddf3..c0f01799f4a0b9 100644
> --- a/drivers/infiniband/hw/cxgb4/provider.c
> +++ b/drivers/infiniband/hw/cxgb4/provider.c
> @@ -377,14 +377,11 @@ static const char * const names[] = {
>          [IP6OUTRSTS] = "ip6OutRsts"
>   };
> 
> -static struct rdma_hw_stats *c4iw_alloc_stats(struct ib_device *ibdev,
> -                                             u32 port_num)
> +static struct rdma_hw_stats *c4iw_alloc_port_stats(struct ib_device *ibdev,
> +                                                  u32 port_num)
>   {
>          BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
> 
> -       if (port_num != 0)
> -               return NULL;
> -

I'm not familiar with this driver, but if port_num must be 0 here, does 
it mean this is per-device not per-port?
Saleem, Shiraz May 18, 2021, 9:58 p.m. UTC | #6
> Subject: Re: [PATCH 01/13] RDMA: Split the alloc_hw_stats() ops to port and
> device variants
> 
> On Tue, May 18, 2021 at 12:18:13AM +0000, Saleem, Shiraz wrote:
> 
> > > What does the sysfs look like? Aren't there duplicated HW stats?
> >
> >
> > Yeah it is duplicated. So we are saying for phys_port_cnt = 1, we want
> > the stats to show up in only place?
> 
> Yes.
> 
> Imagine you had a multi port device, and assign the stats appropriately.
> 
> I didn't see anything in the list that made me think "device stat" but I don't know
> what several of these do

Are we exporting port stat when it should be really be device stat in some of the drivers though?

Most vendor drivers do port stats allocation only. Like...
https://elixir.bootlin.com/linux/v5.13-rc2/source/drivers/infiniband/hw/cxgb4/provider.c#L392
https://elixir.bootlin.com/linux/v5.13-rc2/source/drivers/infiniband/sw/rxe/rxe_hw_counters.c#L27

However .get_hw_stats callback appears to extract from same set of registers for each port

>
> If you can confirm that these are all port stats I can delete the device stats in this
> series.
> 

hfi1 seems to have this separation between device and port stat with a different set of counters
https://elixir.bootlin.com/linux/v5.13-rc2/source/drivers/infiniband/hw/hfi1/verbs.c#L1760

So your device and port stats op callback separation feels warranted.
Saleem, Shiraz May 19, 2021, 1:10 a.m. UTC | #7
> Subject: Re: [PATCH 01/13] RDMA: Split the alloc_hw_stats() ops to port and
> device variants
> 
> On 5/18/2021 12:47 AM, Jason Gunthorpe wrote:
> > External email: Use caution opening links or attachments
> >
> >
> > This is being used to implement both the port and device global stats,
> > which is causing some confusion in the drivers. For instance EFA and
> > i40iw both seem to be misusing the device stats.
> >
> > Split it into two ops so drivers that don't support one or the other
> > can leave the op NULL'd, making the calling code a little simpler to
> > understand.
> >
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > ---

[...]

> > -static struct rdma_hw_stats *c4iw_alloc_stats(struct ib_device *ibdev,
> > -                                             u32 port_num)
> > +static struct rdma_hw_stats *c4iw_alloc_port_stats(struct ib_device *ibdev,
> > +                                                  u32 port_num)
> >   {
> >          BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
> >
> > -       if (port_num != 0)
> > -               return NULL;
> > -
> 
> I'm not familiar with this driver, but if port_num must be 0 here, does it mean this is
> per-device not per-port?

Yeah, additionally per-device seems to coincide with the behavior in get_hw_stats callback for cxgb4.

Shiraz
Gal Pressman May 19, 2021, 11:29 a.m. UTC | #8
On 17/05/2021 19:47, Jason Gunthorpe wrote:
> +struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev)
> +{
> +	/*
> +	 * It is probably a bug that efa reports its port stats as device
> +	 * stats
> +	 */

Hmm, yea this needs some work.
Most of these stats are in fact port stats, but others (admin commands,
keep-alive, allocation/creation error) are device stats.

You can split them if you wish, or I can send a followup patch.

Thanks,
Tested-by: Gal Pressman <galpress@amazon.com>
Jason Gunthorpe May 19, 2021, 12:25 p.m. UTC | #9
On Tue, May 18, 2021 at 09:58:28PM +0000, Saleem, Shiraz wrote:
> > Subject: Re: [PATCH 01/13] RDMA: Split the alloc_hw_stats() ops to port and
> > device variants
> > 
> > On Tue, May 18, 2021 at 12:18:13AM +0000, Saleem, Shiraz wrote:
> > 
> > > > What does the sysfs look like? Aren't there duplicated HW stats?
> > >
> > >
> > > Yeah it is duplicated. So we are saying for phys_port_cnt = 1, we want
> > > the stats to show up in only place?
> > 
> > Yes.
> > 
> > Imagine you had a multi port device, and assign the stats appropriately.
> > 
> > I didn't see anything in the list that made me think "device stat" but I don't know
> > what several of these do
> 
> Are we exporting port stat when it should be really be device stat in some of the drivers though?
>
> Most vendor drivers do port stats allocation only. Like...
> https://elixir.bootlin.com/linux/v5.13-rc2/source/drivers/infiniband/hw/cxgb4/provider.c#L392
> https://elixir.bootlin.com/linux/v5.13-rc2/source/drivers/infiniband/sw/rxe/rxe_hw_counters.c#L27

Yes they don't have global device counters, presumably because they
only have 1 port.

> However .get_hw_stats callback appears to extract from same set of registers for each port

Most devices should only have port counters

> > If you can confirm that these are all port stats I can delete the device stats in this
> > series.
> 
> So your device and port stats op callback separation feels warranted.

So should I delete the device counters here?

Jason
Jason Gunthorpe May 19, 2021, 1:44 p.m. UTC | #10
On Wed, May 19, 2021 at 02:29:57PM +0300, Gal Pressman wrote:
> On 17/05/2021 19:47, Jason Gunthorpe wrote:
> > +struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev)
> > +{
> > +	/*
> > +	 * It is probably a bug that efa reports its port stats as device
> > +	 * stats
> > +	 */
> 
> Hmm, yea this needs some work.
> Most of these stats are in fact port stats, but others (admin commands,
> keep-alive, allocation/creation error) are device stats.
> 
> You can split them if you wish, or I can send a followup patch.

I'm not sure I'd guess right, you'd probably better take it

> Thanks,
> Tested-by: Gal Pressman <galpress@amazon.com>

Thanks,
Jason
Saleem, Shiraz May 19, 2021, 4:50 p.m. UTC | #11
> Subject: Re: [PATCH 01/13] RDMA: Split the alloc_hw_stats() ops to port and
> device variants
> 
> On Tue, May 18, 2021 at 09:58:28PM +0000, Saleem, Shiraz wrote:
> > > Subject: Re: [PATCH 01/13] RDMA: Split the alloc_hw_stats() ops to
> > > port and device variants
> > >
> > > On Tue, May 18, 2021 at 12:18:13AM +0000, Saleem, Shiraz wrote:
> > >
> > > > > What does the sysfs look like? Aren't there duplicated HW stats?
> > > >
> > > >
> > > > Yeah it is duplicated. So we are saying for phys_port_cnt = 1, we
> > > > want the stats to show up in only place?
> > >
> > > Yes.
> > >
> > > Imagine you had a multi port device, and assign the stats appropriately.
> > >
> > > I didn't see anything in the list that made me think "device stat"
> > > but I don't know what several of these do
> >
> > Are we exporting port stat when it should be really be device stat in some of the
> drivers though?
> >
> > Most vendor drivers do port stats allocation only. Like...
> > https://elixir.bootlin.com/linux/v5.13-rc2/source/drivers/infiniband/h
> > w/cxgb4/provider.c#L392
> > https://elixir.bootlin.com/linux/v5.13-rc2/source/drivers/infiniband/s
> > w/rxe/rxe_hw_counters.c#L27
> 
> Yes they don't have global device counters, presumably because they only have 1
> port.
> 
> > However .get_hw_stats callback appears to extract from same set of
> > registers for each port
> 
> Most devices should only have port counters
> 
> > > If you can confirm that these are all port stats I can delete the
> > > device stats in this series.
> >
> > So your device and port stats op callback separation feels warranted.
> 
> So should I delete the device counters here?

For i40iw, yes. Just expose port stats and NULL out the device stat op.
Jason Gunthorpe May 20, 2021, 4:29 p.m. UTC | #12
On Tue, May 18, 2021 at 11:49:57AM +0800, Mark Zhang wrote:
> On 5/18/2021 12:47 AM, Jason Gunthorpe wrote:
> > diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
> > index 05b702de00e89b..29082d8d45fc4f 100644
> > +++ b/drivers/infiniband/core/sysfs.c
> > @@ -981,8 +981,15 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
> >          struct rdma_hw_stats *stats;
> >          int i, ret;
> > 
> > -       stats = device->ops.alloc_hw_stats(device, port_num);
> > -
> > +       if (port_num) {
> > +               if (!device->ops.alloc_hw_port_stats)
> > +                       return;
> 
> Do we need this "if (!device->ops.alloc_hw_port_stats)" here?

Not really in this patch, one of the next patches needs them near
here, but there isn't a good reason to leave them in this patch at
this point.

> > index 3f1893e180ddf3..c0f01799f4a0b9 100644
> > +++ b/drivers/infiniband/hw/cxgb4/provider.c
> > @@ -377,14 +377,11 @@ static const char * const names[] = {
> >          [IP6OUTRSTS] = "ip6OutRsts"
> >   };
> > 
> > -static struct rdma_hw_stats *c4iw_alloc_stats(struct ib_device *ibdev,
> > -                                             u32 port_num)
> > +static struct rdma_hw_stats *c4iw_alloc_port_stats(struct ib_device *ibdev,
> > +                                                  u32 port_num)
> >   {
> >          BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
> > 
> > -       if (port_num != 0)
> > -               return NULL;
> > -
> 
> I'm not familiar with this driver, but if port_num must be 0 here, does it
> mean this is per-device not per-port?

Yes, I switched it, though the actual counters look per port to me.

Thanks,
Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index 15493357cfef09..df9e6c5e4ddf7a 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -605,10 +605,10 @@  void rdma_counter_init(struct ib_device *dev)
 		port_counter->mode.mode = RDMA_COUNTER_MODE_NONE;
 		mutex_init(&port_counter->lock);
 
-		if (!dev->ops.alloc_hw_stats)
+		if (!dev->ops.alloc_hw_port_stats)
 			continue;
 
-		port_counter->hstats = dev->ops.alloc_hw_stats(dev, port);
+		port_counter->hstats = dev->ops.alloc_hw_port_stats(dev, port);
 		if (!port_counter->hstats)
 			goto fail;
 	}
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index c660cef66ac6ca..86a16cd7d7fdb2 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2595,7 +2595,8 @@  void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, add_gid);
 	SET_DEVICE_OP(dev_ops, advise_mr);
 	SET_DEVICE_OP(dev_ops, alloc_dm);
-	SET_DEVICE_OP(dev_ops, alloc_hw_stats);
+	SET_DEVICE_OP(dev_ops, alloc_hw_device_stats);
+	SET_DEVICE_OP(dev_ops, alloc_hw_port_stats);
 	SET_DEVICE_OP(dev_ops, alloc_mr);
 	SET_DEVICE_OP(dev_ops, alloc_mr_integrity);
 	SET_DEVICE_OP(dev_ops, alloc_mw);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 34d0cc1a4147ff..01316926cef63d 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -2060,7 +2060,7 @@  static int stat_get_doit_default_counter(struct sk_buff *skb,
 	if (!device)
 		return -EINVAL;
 
-	if (!device->ops.alloc_hw_stats || !device->ops.get_hw_stats) {
+	if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
 		ret = -EINVAL;
 		goto err;
 	}
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 05b702de00e89b..29082d8d45fc4f 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -981,8 +981,15 @@  static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
 	struct rdma_hw_stats *stats;
 	int i, ret;
 
-	stats = device->ops.alloc_hw_stats(device, port_num);
-
+	if (port_num) {
+		if (!device->ops.alloc_hw_port_stats)
+			return;
+		stats = device->ops.alloc_hw_port_stats(device, port_num);
+	} else {
+		if (!device->ops.alloc_hw_device_stats)
+			return;
+		stats = device->ops.alloc_hw_device_stats(device);
+	}
 	if (!stats)
 		return;
 
@@ -1165,7 +1172,7 @@  static int add_port(struct ib_core_device *coredev, int port_num)
 	 * port, so holder should be device. Therefore skip per port conunter
 	 * initialization.
 	 */
-	if (device->ops.alloc_hw_stats && port_num && is_full_dev)
+	if (device->ops.alloc_hw_port_stats && port_num && is_full_dev)
 		setup_hw_stats(device, p, port_num);
 
 	list_add_tail(&p->kobj.entry, &coredev->port_list);
@@ -1409,7 +1416,7 @@  int ib_device_register_sysfs(struct ib_device *device)
 	if (ret)
 		return ret;
 
-	if (device->ops.alloc_hw_stats)
+	if (device->ops.alloc_hw_device_stats)
 		setup_hw_stats(device, NULL, 0);
 
 	return 0;
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index 3e54e1ae75b499..7ba07797845c03 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -234,13 +234,10 @@  int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
 	return ARRAY_SIZE(bnxt_re_stat_name);
 }
 
-struct rdma_hw_stats *bnxt_re_ib_alloc_hw_stats(struct ib_device *ibdev,
-						u32 port_num)
+struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+						     u32 port_num)
 {
 	BUILD_BUG_ON(ARRAY_SIZE(bnxt_re_stat_name) != BNXT_RE_NUM_COUNTERS);
-	/* We support only per port stats */
-	if (!port_num)
-		return NULL;
 
 	return rdma_alloc_hw_stats_struct(bnxt_re_stat_name,
 					  ARRAY_SIZE(bnxt_re_stat_name),
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h
index ede048607d6c0d..6f2d2f91d9ff09 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.h
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h
@@ -96,8 +96,8 @@  enum bnxt_re_hw_stats {
 	BNXT_RE_NUM_COUNTERS
 };
 
-struct rdma_hw_stats *bnxt_re_ib_alloc_hw_stats(struct ib_device *ibdev,
-						u32 port_num);
+struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+						     u32 port_num);
 int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
 			    struct rdma_hw_stats *stats,
 			    u32 port, int index);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 8bfbf0231a9ef6..c705f09e767036 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -662,7 +662,7 @@  static const struct ib_device_ops bnxt_re_dev_ops = {
 	.uverbs_abi_ver = BNXT_RE_ABI_VERSION,
 
 	.add_gid = bnxt_re_add_gid,
-	.alloc_hw_stats = bnxt_re_ib_alloc_hw_stats,
+	.alloc_hw_port_stats = bnxt_re_ib_alloc_hw_port_stats,
 	.alloc_mr = bnxt_re_alloc_mr,
 	.alloc_pd = bnxt_re_alloc_pd,
 	.alloc_ucontext = bnxt_re_alloc_ucontext,
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 3f1893e180ddf3..c0f01799f4a0b9 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -377,14 +377,11 @@  static const char * const names[] = {
 	[IP6OUTRSTS] = "ip6OutRsts"
 };
 
-static struct rdma_hw_stats *c4iw_alloc_stats(struct ib_device *ibdev,
-					      u32 port_num)
+static struct rdma_hw_stats *c4iw_alloc_port_stats(struct ib_device *ibdev,
+						   u32 port_num)
 {
 	BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
 
-	if (port_num != 0)
-		return NULL;
-
 	return rdma_alloc_hw_stats_struct(names, NR_COUNTERS,
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
@@ -455,7 +452,7 @@  static const struct ib_device_ops c4iw_dev_ops = {
 	.driver_id = RDMA_DRIVER_CXGB4,
 	.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION,
 
-	.alloc_hw_stats = c4iw_alloc_stats,
+	.alloc_hw_port_stats = c4iw_alloc_port_stats,
 	.alloc_mr = c4iw_alloc_mr,
 	.alloc_pd = c4iw_allocate_pd,
 	.alloc_ucontext = c4iw_alloc_ucontext,
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index ea322cec27d230..2b8ca099b38115 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -157,7 +157,8 @@  int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 		  int qp_attr_mask, struct ib_udata *udata);
 enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
 					 u32 port_num);
-struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u32 port_num);
+struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num);
+struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev);
 int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
 		     u32 port_num, int index);
 
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 816cfd65b7ac84..203e6ddcacbc9c 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -242,7 +242,8 @@  static const struct ib_device_ops efa_dev_ops = {
 	.driver_id = RDMA_DRIVER_EFA,
 	.uverbs_abi_ver = EFA_UVERBS_ABI_VERSION,
 
-	.alloc_hw_stats = efa_alloc_hw_stats,
+	.alloc_hw_port_stats = efa_alloc_hw_port_stats,
+	.alloc_hw_device_stats = efa_alloc_hw_device_stats,
 	.alloc_pd = efa_alloc_pd,
 	.alloc_ucontext = efa_alloc_ucontext,
 	.create_cq = efa_create_cq,
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 51572f1dc6111b..be6d3ff0f1be24 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -1904,13 +1904,22 @@  int efa_destroy_ah(struct ib_ah *ibah, u32 flags)
 	return 0;
 }
 
-struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u32 port_num)
+struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
 {
 	return rdma_alloc_hw_stats_struct(efa_stats_names,
 					  ARRAY_SIZE(efa_stats_names),
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
+struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev)
+{
+	/*
+	 * It is probably a bug that efa reports its port stats as device
+	 * stats
+	 */
+	return efa_alloc_hw_port_stats(ibdev, 0);
+}
+
 int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
 		     u32 port_num, int index)
 {
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 554294340caa28..85deba07a6754b 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1693,54 +1693,53 @@  static int init_cntr_names(const char *names_in,
 	return 0;
 }
 
-static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
-					    u32 port_num)
+static int init_counters(struct ib_device *ibdev)
 {
-	int i, err;
+	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+	int i, err = 0;
 
 	mutex_lock(&cntr_names_lock);
-	if (!cntr_names_initialized) {
-		struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-
-		err = init_cntr_names(dd->cntrnames,
-				      dd->cntrnameslen,
-				      num_driver_cntrs,
-				      &num_dev_cntrs,
-				      &dev_cntr_names);
-		if (err) {
-			mutex_unlock(&cntr_names_lock);
-			return NULL;
-		}
-
-		for (i = 0; i < num_driver_cntrs; i++)
-			dev_cntr_names[num_dev_cntrs + i] =
-				driver_cntr_names[i];
-
-		err = init_cntr_names(dd->portcntrnames,
-				      dd->portcntrnameslen,
-				      0,
-				      &num_port_cntrs,
-				      &port_cntr_names);
-		if (err) {
-			kfree(dev_cntr_names);
-			dev_cntr_names = NULL;
-			mutex_unlock(&cntr_names_lock);
-			return NULL;
-		}
-		cntr_names_initialized = 1;
+	if (cntr_names_initialized)
+		goto out_unlock;
+
+	err = init_cntr_names(dd->cntrnames, dd->cntrnameslen, num_driver_cntrs,
+			      &num_dev_cntrs, &dev_cntr_names);
+	if (err)
+		goto out_unlock;
+
+	for (i = 0; i < num_driver_cntrs; i++)
+		dev_cntr_names[num_dev_cntrs + i] = driver_cntr_names[i];
+
+	err = init_cntr_names(dd->portcntrnames, dd->portcntrnameslen, 0,
+			      &num_port_cntrs, &port_cntr_names);
+	if (err) {
+		kfree(dev_cntr_names);
+		dev_cntr_names = NULL;
+		goto out_unlock;
 	}
+	cntr_names_initialized = 1;
+
+out_unlock:
 	mutex_unlock(&cntr_names_lock);
+	return err;
+}
 
-	if (!port_num)
-		return rdma_alloc_hw_stats_struct(
-				dev_cntr_names,
-				num_dev_cntrs + num_driver_cntrs,
-				RDMA_HW_STATS_DEFAULT_LIFESPAN);
-	else
-		return rdma_alloc_hw_stats_struct(
-				port_cntr_names,
-				num_port_cntrs,
-				RDMA_HW_STATS_DEFAULT_LIFESPAN);
+static struct rdma_hw_stats *hfi1_alloc_hw_device_stats(struct ib_device *ibdev)
+{
+	if (init_counters(ibdev))
+		return NULL;
+	return rdma_alloc_hw_stats_struct(dev_cntr_names,
+					  num_dev_cntrs + num_driver_cntrs,
+					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static struct rdma_hw_stats *hfi_alloc_hw_port_stats(struct ib_device *ibdev,
+						     u32 port_num)
+{
+	if (init_counters(ibdev))
+		return NULL;
+	return rdma_alloc_hw_stats_struct(port_cntr_names, num_port_cntrs,
+					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
 static u64 hfi1_sps_ints(void)
@@ -1787,7 +1786,8 @@  static const struct ib_device_ops hfi1_dev_ops = {
 	.owner = THIS_MODULE,
 	.driver_id = RDMA_DRIVER_HFI1,
 
-	.alloc_hw_stats = alloc_hw_stats,
+	.alloc_hw_device_stats = hfi1_alloc_hw_device_stats,
+	.alloc_hw_port_stats = hfi_alloc_hw_port_stats,
 	.alloc_rdma_netdev = hfi1_vnic_alloc_rn,
 	.get_dev_fw_str = hfi1_get_dev_fw_str,
 	.get_hw_stats = get_hw_stats,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index b876d722fcc887..33f16d2b8e74fb 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2441,12 +2441,12 @@  static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str)
 }
 
 /**
- * i40iw_alloc_hw_stats - Allocate a hw stats structure
+ * i40iw_alloc_hw_port_stats - Allocate a hw stats structure
  * @ibdev: device pointer from stack
  * @port_num: port number
  */
-static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev,
-						  u32 port_num)
+static struct rdma_hw_stats *i40iw_alloc_hw_port_stats(struct ib_device *ibdev,
+						       u32 port_num)
 {
 	struct i40iw_device *iwdev = to_iwdev(ibdev);
 	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
@@ -2468,6 +2468,16 @@  static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev,
 					  lifespan);
 }
 
+static struct rdma_hw_stats *
+i40iw_alloc_hw_device_stats(struct ib_device *ibdev)
+{
+	/*
+	 * It is probably a bug that i40iw reports its port stats as device
+	 * stats
+	 */
+	return i40iw_alloc_hw_port_stats(ibdev, 0);
+}
+
 /**
  * i40iw_get_hw_stats - Populates the rdma_hw_stats structure
  * @ibdev: device pointer from stack
@@ -2521,7 +2531,8 @@  static const struct ib_device_ops i40iw_dev_ops = {
 	/* NOTE: Older kernels wrongly use 0 for the uverbs_abi_ver */
 	.uverbs_abi_ver = I40IW_ABI_VER,
 
-	.alloc_hw_stats = i40iw_alloc_hw_stats,
+	.alloc_hw_device_stats = i40iw_alloc_hw_device_stats,
+	.alloc_hw_port_stats = i40iw_alloc_hw_port_stats,
 	.alloc_mr = i40iw_alloc_mr,
 	.alloc_pd = i40iw_alloc_pd,
 	.alloc_ucontext = i40iw_alloc_ucontext,
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 22898d97ecbdac..341162aa2175c4 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2105,17 +2105,29 @@  static const struct diag_counter diag_device_only[] = {
 	DIAG_COUNTER(rq_num_udsdprd, 0x118),
 };
 
-static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev,
-						    u32 port_num)
+static struct rdma_hw_stats *
+mlx4_ib_alloc_hw_device_stats(struct ib_device *ibdev)
 {
 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
 	struct mlx4_ib_diag_counters *diag = dev->diag_counters;
 
-	if (!diag[!!port_num].name)
+	if (!diag[0].name)
 		return NULL;
 
-	return rdma_alloc_hw_stats_struct(diag[!!port_num].name,
-					  diag[!!port_num].num_counters,
+	return rdma_alloc_hw_stats_struct(diag[0].name, diag[0].num_counters,
+					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static struct rdma_hw_stats *
+mlx4_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
+{
+	struct mlx4_ib_dev *dev = to_mdev(ibdev);
+	struct mlx4_ib_diag_counters *diag = dev->diag_counters;
+
+	if (!diag[1].name)
+		return NULL;
+
+	return rdma_alloc_hw_stats_struct(diag[1].name, diag[1].num_counters,
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
@@ -2206,7 +2218,8 @@  static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
 }
 
 static const struct ib_device_ops mlx4_ib_hw_stats_ops = {
-	.alloc_hw_stats = mlx4_ib_alloc_hw_stats,
+	.alloc_hw_device_stats = mlx4_ib_alloc_hw_device_stats,
+	.alloc_hw_port_stats = mlx4_ib_alloc_hw_port_stats,
 	.get_hw_stats = mlx4_ib_get_hw_stats,
 };
 
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index e365341057cbe2..224ba36f2946ca 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -161,22 +161,29 @@  u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
 	return cnts->set_id;
 }
 
-static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
-						    u32 port_num)
+static struct rdma_hw_stats *
+mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
 {
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
-	const struct mlx5_ib_counters *cnts;
-	bool is_switchdev = is_mdev_switchdev_mode(dev->mdev);
+	const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
 
-	if ((is_switchdev && port_num) || (!is_switchdev && !port_num))
-		return NULL;
+	return rdma_alloc_hw_stats_struct(cnts->names,
+					  cnts->num_q_counters +
+						  cnts->num_cong_counters +
+						  cnts->num_ext_ppcnt_counters,
+					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
 
-	cnts = get_counters(dev, port_num - 1);
+static struct rdma_hw_stats *
+mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts;
 
 	return rdma_alloc_hw_stats_struct(cnts->names,
 					  cnts->num_q_counters +
-					  cnts->num_cong_counters +
-					  cnts->num_ext_ppcnt_counters,
+						  cnts->num_cong_counters +
+						  cnts->num_ext_ppcnt_counters,
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
@@ -666,7 +673,17 @@  void mlx5_ib_counters_clear_description(struct ib_counters *counters)
 }
 
 static const struct ib_device_ops hw_stats_ops = {
-	.alloc_hw_stats = mlx5_ib_alloc_hw_stats,
+	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
+	.get_hw_stats = mlx5_ib_get_hw_stats,
+	.counter_bind_qp = mlx5_ib_counter_bind_qp,
+	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
+	.counter_dealloc = mlx5_ib_counter_dealloc,
+	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
+	.counter_update_stats = mlx5_ib_counter_update_stats,
+};
+
+static const struct ib_device_ops hw_switchdev_stats_ops = {
+	.alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats,
 	.get_hw_stats = mlx5_ib_get_hw_stats,
 	.counter_bind_qp = mlx5_ib_counter_bind_qp,
 	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
@@ -690,7 +707,10 @@  int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
 	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
 		return 0;
 
-	ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
+	if (is_mdev_switchdev_mode(dev->mdev))
+		ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops);
+	else
+		ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
 	return mlx5_ib_alloc_counters(dev);
 }
 
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
index f469fd1c753d99..d5ceb706d964f3 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
@@ -40,13 +40,10 @@  int rxe_ib_get_hw_stats(struct ib_device *ibdev,
 	return ARRAY_SIZE(rxe_counter_name);
 }
 
-struct rdma_hw_stats *rxe_ib_alloc_hw_stats(struct ib_device *ibdev,
-					    u32 port_num)
+struct rdma_hw_stats *rxe_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+						 u32 port_num)
 {
 	BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_name) != RXE_NUM_OF_COUNTERS);
-	/* We support only per port stats */
-	if (!port_num)
-		return NULL;
 
 	return rdma_alloc_hw_stats_struct(rxe_counter_name,
 					  ARRAY_SIZE(rxe_counter_name),
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
index 2f369acb46d79b..71f4d4fa9dc8b6 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
@@ -29,8 +29,8 @@  enum rxe_counters {
 	RXE_NUM_OF_COUNTERS
 };
 
-struct rdma_hw_stats *rxe_ib_alloc_hw_stats(struct ib_device *ibdev,
-					    u32 port_num);
+struct rdma_hw_stats *rxe_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+						 u32 port_num);
 int rxe_ib_get_hw_stats(struct ib_device *ibdev,
 			struct rdma_hw_stats *stats,
 			u32 port, int index);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index aeb5e232c19575..80f3094b17b5c3 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1058,7 +1058,7 @@  static const struct ib_device_ops rxe_dev_ops = {
 	.driver_id = RDMA_DRIVER_RXE,
 	.uverbs_abi_ver = RXE_UVERBS_ABI_VERSION,
 
-	.alloc_hw_stats = rxe_ib_alloc_hw_stats,
+	.alloc_hw_port_stats = rxe_ib_alloc_hw_port_stats,
 	.alloc_mr = rxe_alloc_mr,
 	.alloc_pd = rxe_alloc_pd,
 	.alloc_ucontext = rxe_alloc_ucontext,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 7e2f3699b8987b..8fa7fd20e94aa7 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2523,13 +2523,14 @@  struct ib_device_ops {
 			    unsigned int *meta_sg_offset);
 
 	/**
-	 * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
-	 *   driver initialized data.  The struct is kfree()'ed by the sysfs
-	 *   core when the device is removed.  A lifespan of -1 in the return
-	 *   struct tells the core to set a default lifespan.
+	 * alloc_hw_[device,port]_stats - Allocate a struct rdma_hw_stats and
+	 *   fill in the driver initialized data.  The struct is kfree()'ed by
+	 *   the sysfs core when the device is removed.  A lifespan of -1 in the
+	 *   return struct tells the core to set a default lifespan.
 	 */
-	struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
-						u32 port_num);
+	struct rdma_hw_stats *(*alloc_hw_device_stats)(struct ib_device *device);
+	struct rdma_hw_stats *(*alloc_hw_port_stats)(struct ib_device *device,
+						     u32 port_num);
 	/**
 	 * get_hw_stats - Fill in the counter value(s) in the stats struct.
 	 * @index - The index in the value array we wish to have updated, or