diff mbox series

[v5,net-next,3/3] net: mscc: ocelot: use bulk reads for stats

Message ID 20220208044644.359951-4-colin.foster@in-advantage.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series use bulk reads for ocelot statistics | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Colin Foster Feb. 8, 2022, 4:46 a.m. UTC
Create and utilize bulk regmap reads instead of single access for gathering
stats. The background reading of statistics happens frequently, and over
a few contiguous memory regions.

High speed PCIe buses and MMIO access will probably see negligible
performance increase. Lower speed buses like SPI and I2C could see
significant performance increase, since the bus configuration and register
access times account for a large percentage of data transfer time.

Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
---
 drivers/net/ethernet/mscc/ocelot.c | 78 +++++++++++++++++++++++++-----
 include/soc/mscc/ocelot.h          |  8 +++
 2 files changed, 73 insertions(+), 13 deletions(-)

Comments

Vladimir Oltean Feb. 8, 2022, 1:18 p.m. UTC | #1
On Mon, Feb 07, 2022 at 08:46:44PM -0800, Colin Foster wrote:
> Create and utilize bulk regmap reads instead of single access for gathering
> stats. The background reading of statistics happens frequently, and over
> a few contiguous memory regions.
> 
> High speed PCIe buses and MMIO access will probably see negligible
> performance increase. Lower speed buses like SPI and I2C could see
> significant performance increase, since the bus configuration and register
> access times account for a large percentage of data transfer time.
> 
> Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
> ---

Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>

Just one minor comment below, but all in all it's fine as is, too.

>  drivers/net/ethernet/mscc/ocelot.c | 78 +++++++++++++++++++++++++-----
>  include/soc/mscc/ocelot.h          |  8 +++
>  2 files changed, 73 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
> index 455293aa6343..5efb1f3a1410 100644
> --- a/drivers/net/ethernet/mscc/ocelot.c
> +++ b/drivers/net/ethernet/mscc/ocelot.c
> @@ -1737,32 +1737,41 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
>  }
>  EXPORT_SYMBOL(ocelot_get_strings);
>  
> -static void ocelot_update_stats(struct ocelot *ocelot)
> +static int ocelot_update_stats(struct ocelot *ocelot)
>  {
> -	int i, j;
> +	struct ocelot_stats_region *region;
> +	int i, j, err = 0;
>  
>  	mutex_lock(&ocelot->stats_lock);
>  
>  	for (i = 0; i < ocelot->num_phys_ports; i++) {
> +		unsigned int idx = 0;
> +
>  		/* Configure the port to read the stats from */
>  		ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
>  
> -		for (j = 0; j < ocelot->num_stats; j++) {
> -			u32 val;
> -			unsigned int idx = i * ocelot->num_stats + j;
> +		list_for_each_entry(region, &ocelot->stats_regions, node) {
> +			err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> +						   region->offset, region->buf,
> +						   region->count);
> +			if (err)
> +				goto out;
>  
> -			val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> -					      ocelot->stats_layout[j].offset);
> +			for (j = 0; j < region->count; j++) {
> +				if (region->buf[j] < (ocelot->stats[idx + j] & U32_MAX))
> +					ocelot->stats[idx + j] += (u64)1 << 32;
>  
> -			if (val < (ocelot->stats[idx] & U32_MAX))
> -				ocelot->stats[idx] += (u64)1 << 32;
> +				ocelot->stats[idx + j] = (ocelot->stats[idx + j] &
> +							~(u64)U32_MAX) + region->buf[j];
> +			}
>  
> -			ocelot->stats[idx] = (ocelot->stats[idx] &
> -					      ~(u64)U32_MAX) + val;
> +			idx += region->count;
>  		}
>  	}
>  
> +out:
>  	mutex_unlock(&ocelot->stats_lock);
> +	return err;
>  }
>  
>  static void ocelot_check_stats_work(struct work_struct *work)
> @@ -1779,10 +1788,11 @@ static void ocelot_check_stats_work(struct work_struct *work)
>  
>  void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
>  {
> -	int i;
> +	int i, err;
>  
>  	/* check and update now */
> -	ocelot_update_stats(ocelot);
> +	err = ocelot_update_stats(ocelot);
> +	WARN_ONCE(err, "Error %d updating ethtool stats\n", err);

I think a dev_err(ocelot->dev, ...) would be more appropriate here.
WARN_ON() should be used for truly critical errors (things that should
never happen unless bugs are present). When the assertion holds true, a
WARN_ON() will print a stack trace, and when "panic_on_warn" is enabled,
a WARN() is effectively a BUG() and crashes the kernel.

include/asm-generic/bug.h says:

/*
 * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report
 * significant kernel issues that need prompt attention if they should ever
 * appear at runtime.
 *
 * Do not use these macros when checking for invalid external inputs
 * (e.g. invalid system call arguments, or invalid data coming from
 * network/devices), and on transient conditions like ENOMEM or EAGAIN.
 * These macros should be used for recoverable kernel issues only.
 * For invalid external inputs, transient conditions, etc use
 * pr_err[_once/_ratelimited]() followed by dump_stack(), if necessary.
 * Do not include "BUG"/"WARNING" in format strings manually to make these
 * conditions distinguishable from kernel issues.
 *
 * Use the versions with printk format strings to provide better diagnostics.
 */

>  
>  	/* Copy all counters */
>  	for (i = 0; i < ocelot->num_stats; i++)
> @@ -1799,6 +1809,41 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
>  }
>  EXPORT_SYMBOL(ocelot_get_sset_count);
>  
> +static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
> +{
> +	struct ocelot_stats_region *region = NULL;
> +	unsigned int last;
> +	int i;
> +
> +	INIT_LIST_HEAD(&ocelot->stats_regions);
> +
> +	for (i = 0; i < ocelot->num_stats; i++) {
> +		if (region && ocelot->stats_layout[i].offset == last + 1) {
> +			region->count++;
> +		} else {
> +			region = devm_kzalloc(ocelot->dev, sizeof(*region),
> +					      GFP_KERNEL);
> +			if (!region)
> +				return -ENOMEM;
> +
> +			region->offset = ocelot->stats_layout[i].offset;
> +			region->count = 1;
> +			list_add_tail(&region->node, &ocelot->stats_regions);
> +		}
> +
> +		last = ocelot->stats_layout[i].offset;
> +	}
> +
> +	list_for_each_entry(region, &ocelot->stats_regions, node) {
> +		region->buf = devm_kcalloc(ocelot->dev, region->count,
> +					   sizeof(*region->buf), GFP_KERNEL);
> +		if (!region->buf)
> +			return -ENOMEM;
> +	}
> +
> +	return 0;
> +}
> +
>  int ocelot_get_ts_info(struct ocelot *ocelot, int port,
>  		       struct ethtool_ts_info *info)
>  {
> @@ -2799,6 +2844,13 @@ int ocelot_init(struct ocelot *ocelot)
>  				 ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
>  				 ANA_CPUQ_8021_CFG, i);
>  
> +	ret = ocelot_prepare_stats_regions(ocelot);
> +	if (ret) {
> +		destroy_workqueue(ocelot->stats_queue);
> +		destroy_workqueue(ocelot->owq);
> +		return ret;
> +	}
> +
>  	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
>  	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
>  			   OCELOT_STATS_CHECK_DELAY);
> diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
> index 312b72558659..d3291a5f7e88 100644
> --- a/include/soc/mscc/ocelot.h
> +++ b/include/soc/mscc/ocelot.h
> @@ -542,6 +542,13 @@ struct ocelot_stat_layout {
>  	char name[ETH_GSTRING_LEN];
>  };
>  
> +struct ocelot_stats_region {
> +	struct list_head node;
> +	u32 offset;
> +	int count;
> +	u32 *buf;
> +};
> +
>  enum ocelot_tag_prefix {
>  	OCELOT_TAG_PREFIX_DISABLED	= 0,
>  	OCELOT_TAG_PREFIX_NONE,
> @@ -673,6 +680,7 @@ struct ocelot {
>  	struct regmap_field		*regfields[REGFIELD_MAX];
>  	const u32 *const		*map;
>  	const struct ocelot_stat_layout	*stats_layout;
> +	struct list_head		stats_regions;
>  	unsigned int			num_stats;
>  
>  	u32				pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
> -- 
> 2.25.1
>
Vladimir Oltean Feb. 8, 2022, 3:03 p.m. UTC | #2
On Mon, Feb 07, 2022 at 08:46:44PM -0800, Colin Foster wrote:
> Create and utilize bulk regmap reads instead of single access for gathering
> stats. The background reading of statistics happens frequently, and over
> a few contiguous memory regions.
> 
> High speed PCIe buses and MMIO access will probably see negligible
> performance increase. Lower speed buses like SPI and I2C could see
> significant performance increase, since the bus configuration and register
> access times account for a large percentage of data transfer time.
> 
> Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
> ---
>  drivers/net/ethernet/mscc/ocelot.c | 78 +++++++++++++++++++++++++-----
>  include/soc/mscc/ocelot.h          |  8 +++
>  2 files changed, 73 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
> index 455293aa6343..5efb1f3a1410 100644
> --- a/drivers/net/ethernet/mscc/ocelot.c
> +++ b/drivers/net/ethernet/mscc/ocelot.c
> @@ -1737,32 +1737,41 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
>  }
>  EXPORT_SYMBOL(ocelot_get_strings);
>  
> -static void ocelot_update_stats(struct ocelot *ocelot)
> +static int ocelot_update_stats(struct ocelot *ocelot)
>  {
> -	int i, j;
> +	struct ocelot_stats_region *region;
> +	int i, j, err = 0;
>  
>  	mutex_lock(&ocelot->stats_lock);
>  
>  	for (i = 0; i < ocelot->num_phys_ports; i++) {
> +		unsigned int idx = 0;
> +

This is a bug which causes ocelot->stats to be overwritten with the
statistics of port 0, for all ports. Either move the variable
declaration and initialization with 0 in the larger scope (outside the
"for" loop), or initialize idx with i * ocelot->num_stats.

>  		/* Configure the port to read the stats from */
>  		ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
>  
> -		for (j = 0; j < ocelot->num_stats; j++) {
> -			u32 val;
> -			unsigned int idx = i * ocelot->num_stats + j;
> +		list_for_each_entry(region, &ocelot->stats_regions, node) {
> +			err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> +						   region->offset, region->buf,
> +						   region->count);
> +			if (err)
> +				goto out;
>  
> -			val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> -					      ocelot->stats_layout[j].offset);
> +			for (j = 0; j < region->count; j++) {
> +				if (region->buf[j] < (ocelot->stats[idx + j] & U32_MAX))
> +					ocelot->stats[idx + j] += (u64)1 << 32;

I'd prefer if you reduce the apparent complexity of this logic by
creating some temporary variables:

	u64 *stat = &ocelot->stats[idx + j];
	u64 val = region->buf[j];

>  
> -			if (val < (ocelot->stats[idx] & U32_MAX))
> -				ocelot->stats[idx] += (u64)1 << 32;
> +				ocelot->stats[idx + j] = (ocelot->stats[idx + j] &
> +							~(u64)U32_MAX) + region->buf[j];
> +			}
>  
> -			ocelot->stats[idx] = (ocelot->stats[idx] &
> -					      ~(u64)U32_MAX) + val;
> +			idx += region->count;
>  		}
>  	}
>  
> +out:
>  	mutex_unlock(&ocelot->stats_lock);
> +	return err;
>  }
>  
>  static void ocelot_check_stats_work(struct work_struct *work)
> @@ -1779,10 +1788,11 @@ static void ocelot_check_stats_work(struct work_struct *work)
>  
>  void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
>  {
> -	int i;
> +	int i, err;
>  
>  	/* check and update now */
> -	ocelot_update_stats(ocelot);
> +	err = ocelot_update_stats(ocelot);

Please, as a separate change, introduce a function that reads the
statistics for a single port, and make ethtool call that and not the
entire port array, it's pointless.

> +	WARN_ONCE(err, "Error %d updating ethtool stats\n", err);
>  
>  	/* Copy all counters */
>  	for (i = 0; i < ocelot->num_stats; i++)

and here, in the unseen part of the context, lies:

	/* Copy all counters */
	for (i = 0; i < ocelot->num_stats; i++)
		*data++ = ocelot->stats[port * ocelot->num_stats + i];

I think this is buggy, because this is a reader of ocelot->stats which
is not protected by ocelot->stats_lock (it was taken and dropped by
ocelot_update_stats). But a second ocelot_update_stats() can run
concurrently with ethtool and ruin the day, modifying the array at the
same time as it's being read out.

The new function that you introduce, for reading the stats of a single
port, should require that ocelot->stats_lock is already held, and you
should hold it from top-level (ocelot_get_ethtool_stats).

> @@ -1799,6 +1809,41 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
>  }
>  EXPORT_SYMBOL(ocelot_get_sset_count);
>  
> +static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
> +{
> +	struct ocelot_stats_region *region = NULL;
> +	unsigned int last;
> +	int i;
> +
> +	INIT_LIST_HEAD(&ocelot->stats_regions);
> +
> +	for (i = 0; i < ocelot->num_stats; i++) {
> +		if (region && ocelot->stats_layout[i].offset == last + 1) {
> +			region->count++;
> +		} else {
> +			region = devm_kzalloc(ocelot->dev, sizeof(*region),
> +					      GFP_KERNEL);
> +			if (!region)
> +				return -ENOMEM;
> +
> +			region->offset = ocelot->stats_layout[i].offset;
> +			region->count = 1;
> +			list_add_tail(&region->node, &ocelot->stats_regions);
> +		}
> +
> +		last = ocelot->stats_layout[i].offset;
> +	}
> +
> +	list_for_each_entry(region, &ocelot->stats_regions, node) {
> +		region->buf = devm_kcalloc(ocelot->dev, region->count,
> +					   sizeof(*region->buf), GFP_KERNEL);
> +		if (!region->buf)
> +			return -ENOMEM;
> +	}
> +
> +	return 0;
> +}
> +
>  int ocelot_get_ts_info(struct ocelot *ocelot, int port,
>  		       struct ethtool_ts_info *info)
>  {
> @@ -2799,6 +2844,13 @@ int ocelot_init(struct ocelot *ocelot)
>  				 ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
>  				 ANA_CPUQ_8021_CFG, i);
>  
> +	ret = ocelot_prepare_stats_regions(ocelot);
> +	if (ret) {
> +		destroy_workqueue(ocelot->stats_queue);
> +		destroy_workqueue(ocelot->owq);
> +		return ret;
> +	}
> +
>  	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
>  	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
>  			   OCELOT_STATS_CHECK_DELAY);
> diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
> index 312b72558659..d3291a5f7e88 100644
> --- a/include/soc/mscc/ocelot.h
> +++ b/include/soc/mscc/ocelot.h
> @@ -542,6 +542,13 @@ struct ocelot_stat_layout {
>  	char name[ETH_GSTRING_LEN];
>  };
>  
> +struct ocelot_stats_region {
> +	struct list_head node;
> +	u32 offset;
> +	int count;
> +	u32 *buf;
> +};
> +
>  enum ocelot_tag_prefix {
>  	OCELOT_TAG_PREFIX_DISABLED	= 0,
>  	OCELOT_TAG_PREFIX_NONE,
> @@ -673,6 +680,7 @@ struct ocelot {
>  	struct regmap_field		*regfields[REGFIELD_MAX];
>  	const u32 *const		*map;
>  	const struct ocelot_stat_layout	*stats_layout;
> +	struct list_head		stats_regions;
>  	unsigned int			num_stats;
>  
>  	u32				pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
> -- 
> 2.25.1
>
Vladimir Oltean Feb. 8, 2022, 3:34 p.m. UTC | #3
On Tue, Feb 08, 2022 at 05:03:03PM +0200, Vladimir Oltean wrote:
> >  	for (i = 0; i < ocelot->num_phys_ports; i++) {
> > +		unsigned int idx = 0;
> > +
> 
> This is a bug which causes ocelot->stats to be overwritten with the
> statistics of port 0, for all ports. Either move the variable
> declaration and initialization with 0 in the larger scope (outside the
> "for" loop), or initialize idx with i * ocelot->num_stats.

My analysis was slightly incorrect. Somehow I managed to fool myself
into thinking that you had tested this in a limited scenario, hence the
reason you didn't notice it's not working. But apparently you didn't
test with traffic at all.

So ocelot->stats isn't overwritten with the stats of port 0 for all
ports. But rather, all ports write into the ocelot->stats space
dedicated for port 0, effectively overwriting the stats of port 0 with
the stats of the last port. And no one populates the ocelot->stats space
for ports [1 .. last]. So no port has good statistics, I don't see a
circumstance where testing could have misled you.
Colin Foster Feb. 8, 2022, 3:41 p.m. UTC | #4
Hi Vladimir,

On Tue, Feb 08, 2022 at 03:03:04PM +0000, Vladimir Oltean wrote:
> On Mon, Feb 07, 2022 at 08:46:44PM -0800, Colin Foster wrote:
> > Create and utilize bulk regmap reads instead of single access for gathering
> > stats. The background reading of statistics happens frequently, and over
> > a few contiguous memory regions.
> > 
> > High speed PCIe buses and MMIO access will probably see negligible
> > performance increase. Lower speed buses like SPI and I2C could see
> > significant performance increase, since the bus configuration and register
> > access times account for a large percentage of data transfer time.
> > 
> > Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
> > ---
> >  drivers/net/ethernet/mscc/ocelot.c | 78 +++++++++++++++++++++++++-----
> >  include/soc/mscc/ocelot.h          |  8 +++
> >  2 files changed, 73 insertions(+), 13 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
> > index 455293aa6343..5efb1f3a1410 100644
> > --- a/drivers/net/ethernet/mscc/ocelot.c
> > +++ b/drivers/net/ethernet/mscc/ocelot.c
> > @@ -1737,32 +1737,41 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
> >  }
> >  EXPORT_SYMBOL(ocelot_get_strings);
> >  
> > -static void ocelot_update_stats(struct ocelot *ocelot)
> > +static int ocelot_update_stats(struct ocelot *ocelot)
> >  {
> > -	int i, j;
> > +	struct ocelot_stats_region *region;
> > +	int i, j, err = 0;
> >  
> >  	mutex_lock(&ocelot->stats_lock);
> >  
> >  	for (i = 0; i < ocelot->num_phys_ports; i++) {
> > +		unsigned int idx = 0;
> > +
> 
> This is a bug which causes ocelot->stats to be overwritten with the
> statistics of port 0, for all ports. Either move the variable
> declaration and initialization with 0 in the larger scope (outside the
> "for" loop), or initialize idx with i * ocelot->num_stats.

I see that now. It is confusing and I'll clear it up. I never caught
this because I'm testing in a setup where port 0 is the CPU port, so I
can't get ethtool stats. Thanks!

> 
> >  		/* Configure the port to read the stats from */
> >  		ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
> >  
> > -		for (j = 0; j < ocelot->num_stats; j++) {
> > -			u32 val;
> > -			unsigned int idx = i * ocelot->num_stats + j;
> > +		list_for_each_entry(region, &ocelot->stats_regions, node) {
> > +			err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> > +						   region->offset, region->buf,
> > +						   region->count);
> > +			if (err)
> > +				goto out;
> >  
> > -			val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> > -					      ocelot->stats_layout[j].offset);
> > +			for (j = 0; j < region->count; j++) {
> > +				if (region->buf[j] < (ocelot->stats[idx + j] & U32_MAX))
> > +					ocelot->stats[idx + j] += (u64)1 << 32;
> 
> I'd prefer if you reduce the apparent complexity of this logic by
> creating some temporary variables:
> 
> 	u64 *stat = &ocelot->stats[idx + j];
> 	u64 val = region->buf[j];

Can do. Thanks for the suggestion.

> 
> >  
> > -			if (val < (ocelot->stats[idx] & U32_MAX))
> > -				ocelot->stats[idx] += (u64)1 << 32;
> > +				ocelot->stats[idx + j] = (ocelot->stats[idx + j] &
> > +							~(u64)U32_MAX) + region->buf[j];
> > +			}
> >  
> > -			ocelot->stats[idx] = (ocelot->stats[idx] &
> > -					      ~(u64)U32_MAX) + val;
> > +			idx += region->count;
> >  		}
> >  	}
> >  
> > +out:
> >  	mutex_unlock(&ocelot->stats_lock);
> > +	return err;
> >  }
> >  
> >  static void ocelot_check_stats_work(struct work_struct *work)
> > @@ -1779,10 +1788,11 @@ static void ocelot_check_stats_work(struct work_struct *work)
> >  
> >  void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
> >  {
> > -	int i;
> > +	int i, err;
> >  
> >  	/* check and update now */
> > -	ocelot_update_stats(ocelot);
> > +	err = ocelot_update_stats(ocelot);
> 
> Please, as a separate change, introduce a function that reads the
> statistics for a single port, and make ethtool call that and not the
> entire port array, it's pointless.
> 
> > +	WARN_ONCE(err, "Error %d updating ethtool stats\n", err);
> >  
> >  	/* Copy all counters */
> >  	for (i = 0; i < ocelot->num_stats; i++)
> 
> and here, in the unseen part of the context, lies:
> 
> 	/* Copy all counters */
> 	for (i = 0; i < ocelot->num_stats; i++)
> 		*data++ = ocelot->stats[port * ocelot->num_stats + i];
> 
> I think this is buggy, because this is a reader of ocelot->stats which
> is not protected by ocelot->stats_lock (it was taken and dropped by
> ocelot_update_stats). But a second ocelot_update_stats() can run
> concurrently with ethtool and ruin the day, modifying the array at the
> same time as it's being read out.
> 
> The new function that you introduce, for reading the stats of a single
> port, should require that ocelot->stats_lock is already held, and you
> should hold it from top-level (ocelot_get_ethtool_stats).

I'll add this fix as a separate patch. Thanks as always for the
feedback!

> 
> > @@ -1799,6 +1809,41 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
> >  }
> >  EXPORT_SYMBOL(ocelot_get_sset_count);
> >  
> > +static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
> > +{
> > +	struct ocelot_stats_region *region = NULL;
> > +	unsigned int last;
> > +	int i;
> > +
> > +	INIT_LIST_HEAD(&ocelot->stats_regions);
> > +
> > +	for (i = 0; i < ocelot->num_stats; i++) {
> > +		if (region && ocelot->stats_layout[i].offset == last + 1) {
> > +			region->count++;
> > +		} else {
> > +			region = devm_kzalloc(ocelot->dev, sizeof(*region),
> > +					      GFP_KERNEL);
> > +			if (!region)
> > +				return -ENOMEM;
> > +
> > +			region->offset = ocelot->stats_layout[i].offset;
> > +			region->count = 1;
> > +			list_add_tail(&region->node, &ocelot->stats_regions);
> > +		}
> > +
> > +		last = ocelot->stats_layout[i].offset;
> > +	}
> > +
> > +	list_for_each_entry(region, &ocelot->stats_regions, node) {
> > +		region->buf = devm_kcalloc(ocelot->dev, region->count,
> > +					   sizeof(*region->buf), GFP_KERNEL);
> > +		if (!region->buf)
> > +			return -ENOMEM;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  int ocelot_get_ts_info(struct ocelot *ocelot, int port,
> >  		       struct ethtool_ts_info *info)
> >  {
> > @@ -2799,6 +2844,13 @@ int ocelot_init(struct ocelot *ocelot)
> >  				 ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
> >  				 ANA_CPUQ_8021_CFG, i);
> >  
> > +	ret = ocelot_prepare_stats_regions(ocelot);
> > +	if (ret) {
> > +		destroy_workqueue(ocelot->stats_queue);
> > +		destroy_workqueue(ocelot->owq);
> > +		return ret;
> > +	}
> > +
> >  	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
> >  	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
> >  			   OCELOT_STATS_CHECK_DELAY);
> > diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
> > index 312b72558659..d3291a5f7e88 100644
> > --- a/include/soc/mscc/ocelot.h
> > +++ b/include/soc/mscc/ocelot.h
> > @@ -542,6 +542,13 @@ struct ocelot_stat_layout {
> >  	char name[ETH_GSTRING_LEN];
> >  };
> >  
> > +struct ocelot_stats_region {
> > +	struct list_head node;
> > +	u32 offset;
> > +	int count;
> > +	u32 *buf;
> > +};
> > +
> >  enum ocelot_tag_prefix {
> >  	OCELOT_TAG_PREFIX_DISABLED	= 0,
> >  	OCELOT_TAG_PREFIX_NONE,
> > @@ -673,6 +680,7 @@ struct ocelot {
> >  	struct regmap_field		*regfields[REGFIELD_MAX];
> >  	const u32 *const		*map;
> >  	const struct ocelot_stat_layout	*stats_layout;
> > +	struct list_head		stats_regions;
> >  	unsigned int			num_stats;
> >  
> >  	u32				pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
> > -- 
> > 2.25.1
> >
Vladimir Oltean Feb. 8, 2022, 3:45 p.m. UTC | #5
On Tue, Feb 08, 2022 at 07:41:56AM -0800, Colin Foster wrote:
> I see that now. It is confusing and I'll clear it up. I never caught
> this because I'm testing in a setup where port 0 is the CPU port, so I
> can't get ethtool stats. Thanks!

You can retrieve the stats on the CPU port, as they are appended to the
ethtool stats of the DSA master, prefixed with "p%02d_", cpu_dp->index.

ethtool -S eth0 | grep -v ': 0' # where eth0 is the DSA master
Colin Foster Feb. 8, 2022, 4:07 p.m. UTC | #6
On Tue, Feb 08, 2022 at 03:34:49PM +0000, Vladimir Oltean wrote:
> On Tue, Feb 08, 2022 at 05:03:03PM +0200, Vladimir Oltean wrote:
> > >  	for (i = 0; i < ocelot->num_phys_ports; i++) {
> > > +		unsigned int idx = 0;
> > > +
> > 
> > This is a bug which causes ocelot->stats to be overwritten with the
> > statistics of port 0, for all ports. Either move the variable
> > declaration and initialization with 0 in the larger scope (outside the
> > "for" loop), or initialize idx with i * ocelot->num_stats.
> 
> My analysis was slightly incorrect. Somehow I managed to fool myself
> into thinking that you had tested this in a limited scenario, hence the
> reason you didn't notice it's not working. But apparently you didn't
> test with traffic at all.
> 
> So ocelot->stats isn't overwritten with the stats of port 0 for all
> ports. But rather, all ports write into the ocelot->stats space
> dedicated for port 0, effectively overwriting the stats of port 0 with
> the stats of the last port. And no one populates the ocelot->stats space
> for ports [1 .. last]. So no port has good statistics, I don't see a
> circumstance where testing could have misled you.

Both ethtool -S and debugfs show statistics that I'd expect in my test
setup. But yes, the port 0 stats would be especially curious.

EDIT: Just saw your message about these. Yes, port 0 is all messed up:
# ethtool -S eth0 | grep p00 | head
     p00_rx_octets: 13602161426432
     p00_rx_unicast: 4539780431872
     p00_rx_multicast: 9028021256192
     p00_rx_broadcast: 8980776615936
     p00_rx_shorts: 0
     p00_rx_fragments: 0
     p00_rx_jabbers: 0
     p00_rx_crc_align_errs: 0
     p00_rx_sym_errs: 0
     p00_rx_frames_below_65_octets: 4539780431872


(configuration - swp2 plugged into swp3, bridged with stp. swp1 into my
development machine, swp4-7 not up)

# pwd
/sys/class/net
# cat swp[123]/statistics/tx_bytes
44616
44668
52
# cat swp[123]/statistics/rx_bytes
34574
46
73272

# ethtool -S swp1 | head -n 5
NIC statistics:
     tx_packets: 942
     tx_bytes: 48984
     rx_packets: 764
     rx_bytes: 37808
# ethtool -S swp2 | head -n 5
NIC statistics:
     tx_packets: 946
     tx_bytes: 49192
     rx_packets: 1
     rx_bytes: 46
# ethtool -S swp3 | head -n 5
NIC statistics:
     tx_packets: 1
     tx_bytes: 52
     rx_packets: 1699
     rx_bytes: 80658
# ethtool -S swp4 | head -n 5
NIC statistics:
     tx_packets: 0
     tx_bytes: 0
     rx_packets: 0
     rx_bytes: 0
# ip link
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1520 qdisc mq state UP mode DEFAULT group default qlen 1000
    link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff
3: swp1@eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master br0 state UP mode DEFAULT group default qlen 1000
    link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff
4: swp2@eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master br0 state UP mode DEFAULT group default qlen 1000
    link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff
5: swp3@eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master br0 state UP mode DEFAULT group default qlen 1000
    link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff
6: swp4@eth0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff
7: swp5@eth0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff
8: swp6@eth0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff
9: swp7@eth0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff
10: br0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default qlen 1000
    link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff


Clearly my testing wasn't sufficient as there were still issues, but
there were reasonable signs of things working as expected on ports 1-4.
Vladimir Oltean Feb. 8, 2022, 4:10 p.m. UTC | #7
On Tue, Feb 08, 2022 at 08:07:42AM -0800, Colin Foster wrote:
> # ethtool -S swp1 | head -n 5
> NIC statistics:
>      tx_packets: 942
>      tx_bytes: 48984
>      rx_packets: 764
>      rx_bytes: 37808
> # ethtool -S swp2 | head -n 5
> NIC statistics:
>      tx_packets: 946
>      tx_bytes: 49192
>      rx_packets: 1
>      rx_bytes: 46
> # ethtool -S swp3 | head -n 5
> NIC statistics:
>      tx_packets: 1
>      tx_bytes: 52
>      rx_packets: 1699
>      rx_bytes: 80658
> # ethtool -S swp4 | head -n 5
> NIC statistics:
>      tx_packets: 0
>      tx_bytes: 0
>      rx_packets: 0
>      rx_bytes: 0

I understand what confused you now. These are software counters patched
in by dsa_slave_get_ethtool_stats(). They are in no way the counters
from felix_info :: stats_layout, please compare the strings!
Colin Foster Feb. 8, 2022, 4:49 p.m. UTC | #8
On Tue, Feb 08, 2022 at 03:45:15PM +0000, Vladimir Oltean wrote:
> On Tue, Feb 08, 2022 at 07:41:56AM -0800, Colin Foster wrote:
> > I see that now. It is confusing and I'll clear it up. I never caught
> > this because I'm testing in a setup where port 0 is the CPU port, so I
> > can't get ethtool stats. Thanks!
> 
> You can retrieve the stats on the CPU port, as they are appended to the
> ethtool stats of the DSA master, prefixed with "p%02d_", cpu_dp->index.
> 
> ethtool -S eth0 | grep -v ': 0' # where eth0 is the DSA master

Thanks for this hint. I didn't even think to go looking for it since the
DSA Documenation mentions an "inability to fetch CPU port statistics
counters using ethtool." I see now that this bullet point is from 2015,
and probably should be removed entirely.
Vladimir Oltean Feb. 8, 2022, 5:02 p.m. UTC | #9
On Tue, Feb 08, 2022 at 08:49:52AM -0800, Colin Foster wrote:
> On Tue, Feb 08, 2022 at 03:45:15PM +0000, Vladimir Oltean wrote:
> > On Tue, Feb 08, 2022 at 07:41:56AM -0800, Colin Foster wrote:
> > > I see that now. It is confusing and I'll clear it up. I never caught
> > > this because I'm testing in a setup where port 0 is the CPU port, so I
> > > can't get ethtool stats. Thanks!
> > 
> > You can retrieve the stats on the CPU port, as they are appended to the
> > ethtool stats of the DSA master, prefixed with "p%02d_", cpu_dp->index.
> > 
> > ethtool -S eth0 | grep -v ': 0' # where eth0 is the DSA master
> 
> Thanks for this hint. I didn't even think to go looking for it since the
> DSA Documenation mentions an "inability to fetch CPU port statistics
> counters using ethtool." I see now that this bullet point is from 2015,
> and probably should be removed entirely.

Yes, that paragraph did not age well, although it is still true in some
sense, just not in that phrasing. You still cannot collect ethtool
statistics for cascade (DSA) ports. Maybe it should be updated.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 455293aa6343..5efb1f3a1410 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1737,32 +1737,41 @@  void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
 }
 EXPORT_SYMBOL(ocelot_get_strings);
 
-static void ocelot_update_stats(struct ocelot *ocelot)
+static int ocelot_update_stats(struct ocelot *ocelot)
 {
-	int i, j;
+	struct ocelot_stats_region *region;
+	int i, j, err = 0;
 
 	mutex_lock(&ocelot->stats_lock);
 
 	for (i = 0; i < ocelot->num_phys_ports; i++) {
+		unsigned int idx = 0;
+
 		/* Configure the port to read the stats from */
 		ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
 
-		for (j = 0; j < ocelot->num_stats; j++) {
-			u32 val;
-			unsigned int idx = i * ocelot->num_stats + j;
+		list_for_each_entry(region, &ocelot->stats_regions, node) {
+			err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
+						   region->offset, region->buf,
+						   region->count);
+			if (err)
+				goto out;
 
-			val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
-					      ocelot->stats_layout[j].offset);
+			for (j = 0; j < region->count; j++) {
+				if (region->buf[j] < (ocelot->stats[idx + j] & U32_MAX))
+					ocelot->stats[idx + j] += (u64)1 << 32;
 
-			if (val < (ocelot->stats[idx] & U32_MAX))
-				ocelot->stats[idx] += (u64)1 << 32;
+				ocelot->stats[idx + j] = (ocelot->stats[idx + j] &
+							~(u64)U32_MAX) + region->buf[j];
+			}
 
-			ocelot->stats[idx] = (ocelot->stats[idx] &
-					      ~(u64)U32_MAX) + val;
+			idx += region->count;
 		}
 	}
 
+out:
 	mutex_unlock(&ocelot->stats_lock);
+	return err;
 }
 
 static void ocelot_check_stats_work(struct work_struct *work)
@@ -1779,10 +1788,11 @@  static void ocelot_check_stats_work(struct work_struct *work)
 
 void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
 {
-	int i;
+	int i, err;
 
 	/* check and update now */
-	ocelot_update_stats(ocelot);
+	err = ocelot_update_stats(ocelot);
+	WARN_ONCE(err, "Error %d updating ethtool stats\n", err);
 
 	/* Copy all counters */
 	for (i = 0; i < ocelot->num_stats; i++)
@@ -1799,6 +1809,41 @@  int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
 }
 EXPORT_SYMBOL(ocelot_get_sset_count);
 
+static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
+{
+	struct ocelot_stats_region *region = NULL;
+	unsigned int last;
+	int i;
+
+	INIT_LIST_HEAD(&ocelot->stats_regions);
+
+	for (i = 0; i < ocelot->num_stats; i++) {
+		if (region && ocelot->stats_layout[i].offset == last + 1) {
+			region->count++;
+		} else {
+			region = devm_kzalloc(ocelot->dev, sizeof(*region),
+					      GFP_KERNEL);
+			if (!region)
+				return -ENOMEM;
+
+			region->offset = ocelot->stats_layout[i].offset;
+			region->count = 1;
+			list_add_tail(&region->node, &ocelot->stats_regions);
+		}
+
+		last = ocelot->stats_layout[i].offset;
+	}
+
+	list_for_each_entry(region, &ocelot->stats_regions, node) {
+		region->buf = devm_kcalloc(ocelot->dev, region->count,
+					   sizeof(*region->buf), GFP_KERNEL);
+		if (!region->buf)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
 int ocelot_get_ts_info(struct ocelot *ocelot, int port,
 		       struct ethtool_ts_info *info)
 {
@@ -2799,6 +2844,13 @@  int ocelot_init(struct ocelot *ocelot)
 				 ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
 				 ANA_CPUQ_8021_CFG, i);
 
+	ret = ocelot_prepare_stats_regions(ocelot);
+	if (ret) {
+		destroy_workqueue(ocelot->stats_queue);
+		destroy_workqueue(ocelot->owq);
+		return ret;
+	}
+
 	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
 	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
 			   OCELOT_STATS_CHECK_DELAY);
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 312b72558659..d3291a5f7e88 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -542,6 +542,13 @@  struct ocelot_stat_layout {
 	char name[ETH_GSTRING_LEN];
 };
 
+struct ocelot_stats_region {
+	struct list_head node;
+	u32 offset;
+	int count;
+	u32 *buf;
+};
+
 enum ocelot_tag_prefix {
 	OCELOT_TAG_PREFIX_DISABLED	= 0,
 	OCELOT_TAG_PREFIX_NONE,
@@ -673,6 +680,7 @@  struct ocelot {
 	struct regmap_field		*regfields[REGFIELD_MAX];
 	const u32 *const		*map;
 	const struct ocelot_stat_layout	*stats_layout;
+	struct list_head		stats_regions;
 	unsigned int			num_stats;
 
 	u32				pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];