Message ID | 20220208044644.359951-4-colin.foster@in-advantage.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | use bulk reads for ocelot statistics | expand |
On Mon, Feb 07, 2022 at 08:46:44PM -0800, Colin Foster wrote: > Create and utilize bulk regmap reads instead of single access for gathering > stats. The background reading of statistics happens frequently, and over > a few contiguous memory regions. > > High speed PCIe buses and MMIO access will probably see negligible > performance increase. Lower speed buses like SPI and I2C could see > significant performance increase, since the bus configuration and register > access times account for a large percentage of data transfer time. > > Signed-off-by: Colin Foster <colin.foster@in-advantage.com> > --- Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com> Just one minor comment below, but all in all it's fine as is, too. > drivers/net/ethernet/mscc/ocelot.c | 78 +++++++++++++++++++++++++----- > include/soc/mscc/ocelot.h | 8 +++ > 2 files changed, 73 insertions(+), 13 deletions(-) > > diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c > index 455293aa6343..5efb1f3a1410 100644 > --- a/drivers/net/ethernet/mscc/ocelot.c > +++ b/drivers/net/ethernet/mscc/ocelot.c > @@ -1737,32 +1737,41 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data) > } > EXPORT_SYMBOL(ocelot_get_strings); > > -static void ocelot_update_stats(struct ocelot *ocelot) > +static int ocelot_update_stats(struct ocelot *ocelot) > { > - int i, j; > + struct ocelot_stats_region *region; > + int i, j, err = 0; > > mutex_lock(&ocelot->stats_lock); > > for (i = 0; i < ocelot->num_phys_ports; i++) { > + unsigned int idx = 0; > + > /* Configure the port to read the stats from */ > ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG); > > - for (j = 0; j < ocelot->num_stats; j++) { > - u32 val; > - unsigned int idx = i * ocelot->num_stats + j; > + list_for_each_entry(region, &ocelot->stats_regions, node) { > + err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS, > + region->offset, region->buf, > + region->count); > + if (err) > + goto out; > > - val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS, > - ocelot->stats_layout[j].offset); > + for (j = 0; j < region->count; j++) { > + if (region->buf[j] < (ocelot->stats[idx + j] & U32_MAX)) > + ocelot->stats[idx + j] += (u64)1 << 32; > > - if (val < (ocelot->stats[idx] & U32_MAX)) > - ocelot->stats[idx] += (u64)1 << 32; > + ocelot->stats[idx + j] = (ocelot->stats[idx + j] & > + ~(u64)U32_MAX) + region->buf[j]; > + } > > - ocelot->stats[idx] = (ocelot->stats[idx] & > - ~(u64)U32_MAX) + val; > + idx += region->count; > } > } > > +out: > mutex_unlock(&ocelot->stats_lock); > + return err; > } > > static void ocelot_check_stats_work(struct work_struct *work) > @@ -1779,10 +1788,11 @@ static void ocelot_check_stats_work(struct work_struct *work) > > void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data) > { > - int i; > + int i, err; > > /* check and update now */ > - ocelot_update_stats(ocelot); > + err = ocelot_update_stats(ocelot); > + WARN_ONCE(err, "Error %d updating ethtool stats\n", err); I think a dev_err(ocelot->dev, ...) would be more appropriate here. WARN_ON() should be used for truly critical errors (things that should never happen unless bugs are present). When the assertion holds true, a WARN_ON() will print a stack trace, and when "panic_on_warn" is enabled, a WARN() is effectively a BUG() and crashes the kernel. include/asm-generic/bug.h says: /* * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report * significant kernel issues that need prompt attention if they should ever * appear at runtime. * * Do not use these macros when checking for invalid external inputs * (e.g. invalid system call arguments, or invalid data coming from * network/devices), and on transient conditions like ENOMEM or EAGAIN. * These macros should be used for recoverable kernel issues only. * For invalid external inputs, transient conditions, etc use * pr_err[_once/_ratelimited]() followed by dump_stack(), if necessary. * Do not include "BUG"/"WARNING" in format strings manually to make these * conditions distinguishable from kernel issues. * * Use the versions with printk format strings to provide better diagnostics. */ > > /* Copy all counters */ > for (i = 0; i < ocelot->num_stats; i++) > @@ -1799,6 +1809,41 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset) > } > EXPORT_SYMBOL(ocelot_get_sset_count); > > +static int ocelot_prepare_stats_regions(struct ocelot *ocelot) > +{ > + struct ocelot_stats_region *region = NULL; > + unsigned int last; > + int i; > + > + INIT_LIST_HEAD(&ocelot->stats_regions); > + > + for (i = 0; i < ocelot->num_stats; i++) { > + if (region && ocelot->stats_layout[i].offset == last + 1) { > + region->count++; > + } else { > + region = devm_kzalloc(ocelot->dev, sizeof(*region), > + GFP_KERNEL); > + if (!region) > + return -ENOMEM; > + > + region->offset = ocelot->stats_layout[i].offset; > + region->count = 1; > + list_add_tail(®ion->node, &ocelot->stats_regions); > + } > + > + last = ocelot->stats_layout[i].offset; > + } > + > + list_for_each_entry(region, &ocelot->stats_regions, node) { > + region->buf = devm_kcalloc(ocelot->dev, region->count, > + sizeof(*region->buf), GFP_KERNEL); > + if (!region->buf) > + return -ENOMEM; > + } > + > + return 0; > +} > + > int ocelot_get_ts_info(struct ocelot *ocelot, int port, > struct ethtool_ts_info *info) > { > @@ -2799,6 +2844,13 @@ int ocelot_init(struct ocelot *ocelot) > ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6), > ANA_CPUQ_8021_CFG, i); > > + ret = ocelot_prepare_stats_regions(ocelot); > + if (ret) { > + destroy_workqueue(ocelot->stats_queue); > + destroy_workqueue(ocelot->owq); > + return ret; > + } > + > INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work); > queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work, > OCELOT_STATS_CHECK_DELAY); > diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h > index 312b72558659..d3291a5f7e88 100644 > --- a/include/soc/mscc/ocelot.h > +++ b/include/soc/mscc/ocelot.h > @@ -542,6 +542,13 @@ struct ocelot_stat_layout { > char name[ETH_GSTRING_LEN]; > }; > > +struct ocelot_stats_region { > + struct list_head node; > + u32 offset; > + int count; > + u32 *buf; > +}; > + > enum ocelot_tag_prefix { > OCELOT_TAG_PREFIX_DISABLED = 0, > OCELOT_TAG_PREFIX_NONE, > @@ -673,6 +680,7 @@ struct ocelot { > struct regmap_field *regfields[REGFIELD_MAX]; > const u32 *const *map; > const struct ocelot_stat_layout *stats_layout; > + struct list_head stats_regions; > unsigned int num_stats; > > u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM]; > -- > 2.25.1 >
On Mon, Feb 07, 2022 at 08:46:44PM -0800, Colin Foster wrote: > Create and utilize bulk regmap reads instead of single access for gathering > stats. The background reading of statistics happens frequently, and over > a few contiguous memory regions. > > High speed PCIe buses and MMIO access will probably see negligible > performance increase. Lower speed buses like SPI and I2C could see > significant performance increase, since the bus configuration and register > access times account for a large percentage of data transfer time. > > Signed-off-by: Colin Foster <colin.foster@in-advantage.com> > --- > drivers/net/ethernet/mscc/ocelot.c | 78 +++++++++++++++++++++++++----- > include/soc/mscc/ocelot.h | 8 +++ > 2 files changed, 73 insertions(+), 13 deletions(-) > > diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c > index 455293aa6343..5efb1f3a1410 100644 > --- a/drivers/net/ethernet/mscc/ocelot.c > +++ b/drivers/net/ethernet/mscc/ocelot.c > @@ -1737,32 +1737,41 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data) > } > EXPORT_SYMBOL(ocelot_get_strings); > > -static void ocelot_update_stats(struct ocelot *ocelot) > +static int ocelot_update_stats(struct ocelot *ocelot) > { > - int i, j; > + struct ocelot_stats_region *region; > + int i, j, err = 0; > > mutex_lock(&ocelot->stats_lock); > > for (i = 0; i < ocelot->num_phys_ports; i++) { > + unsigned int idx = 0; > + This is a bug which causes ocelot->stats to be overwritten with the statistics of port 0, for all ports. Either move the variable declaration and initialization with 0 in the larger scope (outside the "for" loop), or initialize idx with i * ocelot->num_stats. > /* Configure the port to read the stats from */ > ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG); > > - for (j = 0; j < ocelot->num_stats; j++) { > - u32 val; > - unsigned int idx = i * ocelot->num_stats + j; > + list_for_each_entry(region, &ocelot->stats_regions, node) { > + err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS, > + region->offset, region->buf, > + region->count); > + if (err) > + goto out; > > - val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS, > - ocelot->stats_layout[j].offset); > + for (j = 0; j < region->count; j++) { > + if (region->buf[j] < (ocelot->stats[idx + j] & U32_MAX)) > + ocelot->stats[idx + j] += (u64)1 << 32; I'd prefer if you reduce the apparent complexity of this logic by creating some temporary variables: u64 *stat = &ocelot->stats[idx + j]; u64 val = region->buf[j]; > > - if (val < (ocelot->stats[idx] & U32_MAX)) > - ocelot->stats[idx] += (u64)1 << 32; > + ocelot->stats[idx + j] = (ocelot->stats[idx + j] & > + ~(u64)U32_MAX) + region->buf[j]; > + } > > - ocelot->stats[idx] = (ocelot->stats[idx] & > - ~(u64)U32_MAX) + val; > + idx += region->count; > } > } > > +out: > mutex_unlock(&ocelot->stats_lock); > + return err; > } > > static void ocelot_check_stats_work(struct work_struct *work) > @@ -1779,10 +1788,11 @@ static void ocelot_check_stats_work(struct work_struct *work) > > void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data) > { > - int i; > + int i, err; > > /* check and update now */ > - ocelot_update_stats(ocelot); > + err = ocelot_update_stats(ocelot); Please, as a separate change, introduce a function that reads the statistics for a single port, and make ethtool call that and not the entire port array, it's pointless. > + WARN_ONCE(err, "Error %d updating ethtool stats\n", err); > > /* Copy all counters */ > for (i = 0; i < ocelot->num_stats; i++) and here, in the unseen part of the context, lies: /* Copy all counters */ for (i = 0; i < ocelot->num_stats; i++) *data++ = ocelot->stats[port * ocelot->num_stats + i]; I think this is buggy, because this is a reader of ocelot->stats which is not protected by ocelot->stats_lock (it was taken and dropped by ocelot_update_stats). But a second ocelot_update_stats() can run concurrently with ethtool and ruin the day, modifying the array at the same time as it's being read out. The new function that you introduce, for reading the stats of a single port, should require that ocelot->stats_lock is already held, and you should hold it from top-level (ocelot_get_ethtool_stats). > @@ -1799,6 +1809,41 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset) > } > EXPORT_SYMBOL(ocelot_get_sset_count); > > +static int ocelot_prepare_stats_regions(struct ocelot *ocelot) > +{ > + struct ocelot_stats_region *region = NULL; > + unsigned int last; > + int i; > + > + INIT_LIST_HEAD(&ocelot->stats_regions); > + > + for (i = 0; i < ocelot->num_stats; i++) { > + if (region && ocelot->stats_layout[i].offset == last + 1) { > + region->count++; > + } else { > + region = devm_kzalloc(ocelot->dev, sizeof(*region), > + GFP_KERNEL); > + if (!region) > + return -ENOMEM; > + > + region->offset = ocelot->stats_layout[i].offset; > + region->count = 1; > + list_add_tail(®ion->node, &ocelot->stats_regions); > + } > + > + last = ocelot->stats_layout[i].offset; > + } > + > + list_for_each_entry(region, &ocelot->stats_regions, node) { > + region->buf = devm_kcalloc(ocelot->dev, region->count, > + sizeof(*region->buf), GFP_KERNEL); > + if (!region->buf) > + return -ENOMEM; > + } > + > + return 0; > +} > + > int ocelot_get_ts_info(struct ocelot *ocelot, int port, > struct ethtool_ts_info *info) > { > @@ -2799,6 +2844,13 @@ int ocelot_init(struct ocelot *ocelot) > ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6), > ANA_CPUQ_8021_CFG, i); > > + ret = ocelot_prepare_stats_regions(ocelot); > + if (ret) { > + destroy_workqueue(ocelot->stats_queue); > + destroy_workqueue(ocelot->owq); > + return ret; > + } > + > INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work); > queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work, > OCELOT_STATS_CHECK_DELAY); > diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h > index 312b72558659..d3291a5f7e88 100644 > --- a/include/soc/mscc/ocelot.h > +++ b/include/soc/mscc/ocelot.h > @@ -542,6 +542,13 @@ struct ocelot_stat_layout { > char name[ETH_GSTRING_LEN]; > }; > > +struct ocelot_stats_region { > + struct list_head node; > + u32 offset; > + int count; > + u32 *buf; > +}; > + > enum ocelot_tag_prefix { > OCELOT_TAG_PREFIX_DISABLED = 0, > OCELOT_TAG_PREFIX_NONE, > @@ -673,6 +680,7 @@ struct ocelot { > struct regmap_field *regfields[REGFIELD_MAX]; > const u32 *const *map; > const struct ocelot_stat_layout *stats_layout; > + struct list_head stats_regions; > unsigned int num_stats; > > u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM]; > -- > 2.25.1 >
On Tue, Feb 08, 2022 at 05:03:03PM +0200, Vladimir Oltean wrote: > > for (i = 0; i < ocelot->num_phys_ports; i++) { > > + unsigned int idx = 0; > > + > > This is a bug which causes ocelot->stats to be overwritten with the > statistics of port 0, for all ports. Either move the variable > declaration and initialization with 0 in the larger scope (outside the > "for" loop), or initialize idx with i * ocelot->num_stats. My analysis was slightly incorrect. Somehow I managed to fool myself into thinking that you had tested this in a limited scenario, hence the reason you didn't notice it's not working. But apparently you didn't test with traffic at all. So ocelot->stats isn't overwritten with the stats of port 0 for all ports. But rather, all ports write into the ocelot->stats space dedicated for port 0, effectively overwriting the stats of port 0 with the stats of the last port. And no one populates the ocelot->stats space for ports [1 .. last]. So no port has good statistics, I don't see a circumstance where testing could have misled you.
Hi Vladimir, On Tue, Feb 08, 2022 at 03:03:04PM +0000, Vladimir Oltean wrote: > On Mon, Feb 07, 2022 at 08:46:44PM -0800, Colin Foster wrote: > > Create and utilize bulk regmap reads instead of single access for gathering > > stats. The background reading of statistics happens frequently, and over > > a few contiguous memory regions. > > > > High speed PCIe buses and MMIO access will probably see negligible > > performance increase. Lower speed buses like SPI and I2C could see > > significant performance increase, since the bus configuration and register > > access times account for a large percentage of data transfer time. > > > > Signed-off-by: Colin Foster <colin.foster@in-advantage.com> > > --- > > drivers/net/ethernet/mscc/ocelot.c | 78 +++++++++++++++++++++++++----- > > include/soc/mscc/ocelot.h | 8 +++ > > 2 files changed, 73 insertions(+), 13 deletions(-) > > > > diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c > > index 455293aa6343..5efb1f3a1410 100644 > > --- a/drivers/net/ethernet/mscc/ocelot.c > > +++ b/drivers/net/ethernet/mscc/ocelot.c > > @@ -1737,32 +1737,41 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data) > > } > > EXPORT_SYMBOL(ocelot_get_strings); > > > > -static void ocelot_update_stats(struct ocelot *ocelot) > > +static int ocelot_update_stats(struct ocelot *ocelot) > > { > > - int i, j; > > + struct ocelot_stats_region *region; > > + int i, j, err = 0; > > > > mutex_lock(&ocelot->stats_lock); > > > > for (i = 0; i < ocelot->num_phys_ports; i++) { > > + unsigned int idx = 0; > > + > > This is a bug which causes ocelot->stats to be overwritten with the > statistics of port 0, for all ports. Either move the variable > declaration and initialization with 0 in the larger scope (outside the > "for" loop), or initialize idx with i * ocelot->num_stats. I see that now. It is confusing and I'll clear it up. I never caught this because I'm testing in a setup where port 0 is the CPU port, so I can't get ethtool stats. Thanks! > > > /* Configure the port to read the stats from */ > > ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG); > > > > - for (j = 0; j < ocelot->num_stats; j++) { > > - u32 val; > > - unsigned int idx = i * ocelot->num_stats + j; > > + list_for_each_entry(region, &ocelot->stats_regions, node) { > > + err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS, > > + region->offset, region->buf, > > + region->count); > > + if (err) > > + goto out; > > > > - val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS, > > - ocelot->stats_layout[j].offset); > > + for (j = 0; j < region->count; j++) { > > + if (region->buf[j] < (ocelot->stats[idx + j] & U32_MAX)) > > + ocelot->stats[idx + j] += (u64)1 << 32; > > I'd prefer if you reduce the apparent complexity of this logic by > creating some temporary variables: > > u64 *stat = &ocelot->stats[idx + j]; > u64 val = region->buf[j]; Can do. Thanks for the suggestion. > > > > > - if (val < (ocelot->stats[idx] & U32_MAX)) > > - ocelot->stats[idx] += (u64)1 << 32; > > + ocelot->stats[idx + j] = (ocelot->stats[idx + j] & > > + ~(u64)U32_MAX) + region->buf[j]; > > + } > > > > - ocelot->stats[idx] = (ocelot->stats[idx] & > > - ~(u64)U32_MAX) + val; > > + idx += region->count; > > } > > } > > > > +out: > > mutex_unlock(&ocelot->stats_lock); > > + return err; > > } > > > > static void ocelot_check_stats_work(struct work_struct *work) > > @@ -1779,10 +1788,11 @@ static void ocelot_check_stats_work(struct work_struct *work) > > > > void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data) > > { > > - int i; > > + int i, err; > > > > /* check and update now */ > > - ocelot_update_stats(ocelot); > > + err = ocelot_update_stats(ocelot); > > Please, as a separate change, introduce a function that reads the > statistics for a single port, and make ethtool call that and not the > entire port array, it's pointless. > > > + WARN_ONCE(err, "Error %d updating ethtool stats\n", err); > > > > /* Copy all counters */ > > for (i = 0; i < ocelot->num_stats; i++) > > and here, in the unseen part of the context, lies: > > /* Copy all counters */ > for (i = 0; i < ocelot->num_stats; i++) > *data++ = ocelot->stats[port * ocelot->num_stats + i]; > > I think this is buggy, because this is a reader of ocelot->stats which > is not protected by ocelot->stats_lock (it was taken and dropped by > ocelot_update_stats). But a second ocelot_update_stats() can run > concurrently with ethtool and ruin the day, modifying the array at the > same time as it's being read out. > > The new function that you introduce, for reading the stats of a single > port, should require that ocelot->stats_lock is already held, and you > should hold it from top-level (ocelot_get_ethtool_stats). I'll add this fix as a separate patch. Thanks as always for the feedback! > > > @@ -1799,6 +1809,41 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset) > > } > > EXPORT_SYMBOL(ocelot_get_sset_count); > > > > +static int ocelot_prepare_stats_regions(struct ocelot *ocelot) > > +{ > > + struct ocelot_stats_region *region = NULL; > > + unsigned int last; > > + int i; > > + > > + INIT_LIST_HEAD(&ocelot->stats_regions); > > + > > + for (i = 0; i < ocelot->num_stats; i++) { > > + if (region && ocelot->stats_layout[i].offset == last + 1) { > > + region->count++; > > + } else { > > + region = devm_kzalloc(ocelot->dev, sizeof(*region), > > + GFP_KERNEL); > > + if (!region) > > + return -ENOMEM; > > + > > + region->offset = ocelot->stats_layout[i].offset; > > + region->count = 1; > > + list_add_tail(®ion->node, &ocelot->stats_regions); > > + } > > + > > + last = ocelot->stats_layout[i].offset; > > + } > > + > > + list_for_each_entry(region, &ocelot->stats_regions, node) { > > + region->buf = devm_kcalloc(ocelot->dev, region->count, > > + sizeof(*region->buf), GFP_KERNEL); > > + if (!region->buf) > > + return -ENOMEM; > > + } > > + > > + return 0; > > +} > > + > > int ocelot_get_ts_info(struct ocelot *ocelot, int port, > > struct ethtool_ts_info *info) > > { > > @@ -2799,6 +2844,13 @@ int ocelot_init(struct ocelot *ocelot) > > ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6), > > ANA_CPUQ_8021_CFG, i); > > > > + ret = ocelot_prepare_stats_regions(ocelot); > > + if (ret) { > > + destroy_workqueue(ocelot->stats_queue); > > + destroy_workqueue(ocelot->owq); > > + return ret; > > + } > > + > > INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work); > > queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work, > > OCELOT_STATS_CHECK_DELAY); > > diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h > > index 312b72558659..d3291a5f7e88 100644 > > --- a/include/soc/mscc/ocelot.h > > +++ b/include/soc/mscc/ocelot.h > > @@ -542,6 +542,13 @@ struct ocelot_stat_layout { > > char name[ETH_GSTRING_LEN]; > > }; > > > > +struct ocelot_stats_region { > > + struct list_head node; > > + u32 offset; > > + int count; > > + u32 *buf; > > +}; > > + > > enum ocelot_tag_prefix { > > OCELOT_TAG_PREFIX_DISABLED = 0, > > OCELOT_TAG_PREFIX_NONE, > > @@ -673,6 +680,7 @@ struct ocelot { > > struct regmap_field *regfields[REGFIELD_MAX]; > > const u32 *const *map; > > const struct ocelot_stat_layout *stats_layout; > > + struct list_head stats_regions; > > unsigned int num_stats; > > > > u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM]; > > -- > > 2.25.1 > >
On Tue, Feb 08, 2022 at 07:41:56AM -0800, Colin Foster wrote: > I see that now. It is confusing and I'll clear it up. I never caught > this because I'm testing in a setup where port 0 is the CPU port, so I > can't get ethtool stats. Thanks! You can retrieve the stats on the CPU port, as they are appended to the ethtool stats of the DSA master, prefixed with "p%02d_", cpu_dp->index. ethtool -S eth0 | grep -v ': 0' # where eth0 is the DSA master
On Tue, Feb 08, 2022 at 03:34:49PM +0000, Vladimir Oltean wrote: > On Tue, Feb 08, 2022 at 05:03:03PM +0200, Vladimir Oltean wrote: > > > for (i = 0; i < ocelot->num_phys_ports; i++) { > > > + unsigned int idx = 0; > > > + > > > > This is a bug which causes ocelot->stats to be overwritten with the > > statistics of port 0, for all ports. Either move the variable > > declaration and initialization with 0 in the larger scope (outside the > > "for" loop), or initialize idx with i * ocelot->num_stats. > > My analysis was slightly incorrect. Somehow I managed to fool myself > into thinking that you had tested this in a limited scenario, hence the > reason you didn't notice it's not working. But apparently you didn't > test with traffic at all. > > So ocelot->stats isn't overwritten with the stats of port 0 for all > ports. But rather, all ports write into the ocelot->stats space > dedicated for port 0, effectively overwriting the stats of port 0 with > the stats of the last port. And no one populates the ocelot->stats space > for ports [1 .. last]. So no port has good statistics, I don't see a > circumstance where testing could have misled you. Both ethtool -S and debugfs show statistics that I'd expect in my test setup. But yes, the port 0 stats would be especially curious. EDIT: Just saw your message about these. Yes, port 0 is all messed up: # ethtool -S eth0 | grep p00 | head p00_rx_octets: 13602161426432 p00_rx_unicast: 4539780431872 p00_rx_multicast: 9028021256192 p00_rx_broadcast: 8980776615936 p00_rx_shorts: 0 p00_rx_fragments: 0 p00_rx_jabbers: 0 p00_rx_crc_align_errs: 0 p00_rx_sym_errs: 0 p00_rx_frames_below_65_octets: 4539780431872 (configuration - swp2 plugged into swp3, bridged with stp. swp1 into my development machine, swp4-7 not up) # pwd /sys/class/net # cat swp[123]/statistics/tx_bytes 44616 44668 52 # cat swp[123]/statistics/rx_bytes 34574 46 73272 # ethtool -S swp1 | head -n 5 NIC statistics: tx_packets: 942 tx_bytes: 48984 rx_packets: 764 rx_bytes: 37808 # ethtool -S swp2 | head -n 5 NIC statistics: tx_packets: 946 tx_bytes: 49192 rx_packets: 1 rx_bytes: 46 # ethtool -S swp3 | head -n 5 NIC statistics: tx_packets: 1 tx_bytes: 52 rx_packets: 1699 rx_bytes: 80658 # ethtool -S swp4 | head -n 5 NIC statistics: tx_packets: 0 tx_bytes: 0 rx_packets: 0 rx_bytes: 0 # ip link 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1520 qdisc mq state UP mode DEFAULT group default qlen 1000 link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff 3: swp1@eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master br0 state UP mode DEFAULT group default qlen 1000 link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff 4: swp2@eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master br0 state UP mode DEFAULT group default qlen 1000 link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff 5: swp3@eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master br0 state UP mode DEFAULT group default qlen 1000 link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff 6: swp4@eth0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff 7: swp5@eth0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff 8: swp6@eth0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff 9: swp7@eth0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff 10: br0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default qlen 1000 link/ether 24:76:25:76:35:37 brd ff:ff:ff:ff:ff:ff Clearly my testing wasn't sufficient as there were still issues, but there were reasonable signs of things working as expected on ports 1-4.
On Tue, Feb 08, 2022 at 08:07:42AM -0800, Colin Foster wrote: > # ethtool -S swp1 | head -n 5 > NIC statistics: > tx_packets: 942 > tx_bytes: 48984 > rx_packets: 764 > rx_bytes: 37808 > # ethtool -S swp2 | head -n 5 > NIC statistics: > tx_packets: 946 > tx_bytes: 49192 > rx_packets: 1 > rx_bytes: 46 > # ethtool -S swp3 | head -n 5 > NIC statistics: > tx_packets: 1 > tx_bytes: 52 > rx_packets: 1699 > rx_bytes: 80658 > # ethtool -S swp4 | head -n 5 > NIC statistics: > tx_packets: 0 > tx_bytes: 0 > rx_packets: 0 > rx_bytes: 0 I understand what confused you now. These are software counters patched in by dsa_slave_get_ethtool_stats(). They are in no way the counters from felix_info :: stats_layout, please compare the strings!
On Tue, Feb 08, 2022 at 03:45:15PM +0000, Vladimir Oltean wrote: > On Tue, Feb 08, 2022 at 07:41:56AM -0800, Colin Foster wrote: > > I see that now. It is confusing and I'll clear it up. I never caught > > this because I'm testing in a setup where port 0 is the CPU port, so I > > can't get ethtool stats. Thanks! > > You can retrieve the stats on the CPU port, as they are appended to the > ethtool stats of the DSA master, prefixed with "p%02d_", cpu_dp->index. > > ethtool -S eth0 | grep -v ': 0' # where eth0 is the DSA master Thanks for this hint. I didn't even think to go looking for it since the DSA Documenation mentions an "inability to fetch CPU port statistics counters using ethtool." I see now that this bullet point is from 2015, and probably should be removed entirely.
On Tue, Feb 08, 2022 at 08:49:52AM -0800, Colin Foster wrote: > On Tue, Feb 08, 2022 at 03:45:15PM +0000, Vladimir Oltean wrote: > > On Tue, Feb 08, 2022 at 07:41:56AM -0800, Colin Foster wrote: > > > I see that now. It is confusing and I'll clear it up. I never caught > > > this because I'm testing in a setup where port 0 is the CPU port, so I > > > can't get ethtool stats. Thanks! > > > > You can retrieve the stats on the CPU port, as they are appended to the > > ethtool stats of the DSA master, prefixed with "p%02d_", cpu_dp->index. > > > > ethtool -S eth0 | grep -v ': 0' # where eth0 is the DSA master > > Thanks for this hint. I didn't even think to go looking for it since the > DSA Documenation mentions an "inability to fetch CPU port statistics > counters using ethtool." I see now that this bullet point is from 2015, > and probably should be removed entirely. Yes, that paragraph did not age well, although it is still true in some sense, just not in that phrasing. You still cannot collect ethtool statistics for cascade (DSA) ports. Maybe it should be updated.
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 455293aa6343..5efb1f3a1410 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1737,32 +1737,41 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data) } EXPORT_SYMBOL(ocelot_get_strings); -static void ocelot_update_stats(struct ocelot *ocelot) +static int ocelot_update_stats(struct ocelot *ocelot) { - int i, j; + struct ocelot_stats_region *region; + int i, j, err = 0; mutex_lock(&ocelot->stats_lock); for (i = 0; i < ocelot->num_phys_ports; i++) { + unsigned int idx = 0; + /* Configure the port to read the stats from */ ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG); - for (j = 0; j < ocelot->num_stats; j++) { - u32 val; - unsigned int idx = i * ocelot->num_stats + j; + list_for_each_entry(region, &ocelot->stats_regions, node) { + err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS, + region->offset, region->buf, + region->count); + if (err) + goto out; - val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS, - ocelot->stats_layout[j].offset); + for (j = 0; j < region->count; j++) { + if (region->buf[j] < (ocelot->stats[idx + j] & U32_MAX)) + ocelot->stats[idx + j] += (u64)1 << 32; - if (val < (ocelot->stats[idx] & U32_MAX)) - ocelot->stats[idx] += (u64)1 << 32; + ocelot->stats[idx + j] = (ocelot->stats[idx + j] & + ~(u64)U32_MAX) + region->buf[j]; + } - ocelot->stats[idx] = (ocelot->stats[idx] & - ~(u64)U32_MAX) + val; + idx += region->count; } } +out: mutex_unlock(&ocelot->stats_lock); + return err; } static void ocelot_check_stats_work(struct work_struct *work) @@ -1779,10 +1788,11 @@ static void ocelot_check_stats_work(struct work_struct *work) void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data) { - int i; + int i, err; /* check and update now */ - ocelot_update_stats(ocelot); + err = ocelot_update_stats(ocelot); + WARN_ONCE(err, "Error %d updating ethtool stats\n", err); /* Copy all counters */ for (i = 0; i < ocelot->num_stats; i++) @@ -1799,6 +1809,41 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset) } EXPORT_SYMBOL(ocelot_get_sset_count); +static int ocelot_prepare_stats_regions(struct ocelot *ocelot) +{ + struct ocelot_stats_region *region = NULL; + unsigned int last; + int i; + + INIT_LIST_HEAD(&ocelot->stats_regions); + + for (i = 0; i < ocelot->num_stats; i++) { + if (region && ocelot->stats_layout[i].offset == last + 1) { + region->count++; + } else { + region = devm_kzalloc(ocelot->dev, sizeof(*region), + GFP_KERNEL); + if (!region) + return -ENOMEM; + + region->offset = ocelot->stats_layout[i].offset; + region->count = 1; + list_add_tail(®ion->node, &ocelot->stats_regions); + } + + last = ocelot->stats_layout[i].offset; + } + + list_for_each_entry(region, &ocelot->stats_regions, node) { + region->buf = devm_kcalloc(ocelot->dev, region->count, + sizeof(*region->buf), GFP_KERNEL); + if (!region->buf) + return -ENOMEM; + } + + return 0; +} + int ocelot_get_ts_info(struct ocelot *ocelot, int port, struct ethtool_ts_info *info) { @@ -2799,6 +2844,13 @@ int ocelot_init(struct ocelot *ocelot) ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6), ANA_CPUQ_8021_CFG, i); + ret = ocelot_prepare_stats_regions(ocelot); + if (ret) { + destroy_workqueue(ocelot->stats_queue); + destroy_workqueue(ocelot->owq); + return ret; + } + INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work); queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work, OCELOT_STATS_CHECK_DELAY); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 312b72558659..d3291a5f7e88 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -542,6 +542,13 @@ struct ocelot_stat_layout { char name[ETH_GSTRING_LEN]; }; +struct ocelot_stats_region { + struct list_head node; + u32 offset; + int count; + u32 *buf; +}; + enum ocelot_tag_prefix { OCELOT_TAG_PREFIX_DISABLED = 0, OCELOT_TAG_PREFIX_NONE, @@ -673,6 +680,7 @@ struct ocelot { struct regmap_field *regfields[REGFIELD_MAX]; const u32 *const *map; const struct ocelot_stat_layout *stats_layout; + struct list_head stats_regions; unsigned int num_stats; u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
Create and utilize bulk regmap reads instead of single access for gathering stats. The background reading of statistics happens frequently, and over a few contiguous memory regions. High speed PCIe buses and MMIO access will probably see negligible performance increase. Lower speed buses like SPI and I2C could see significant performance increase, since the bus configuration and register access times account for a large percentage of data transfer time. Signed-off-by: Colin Foster <colin.foster@in-advantage.com> --- drivers/net/ethernet/mscc/ocelot.c | 78 +++++++++++++++++++++++++----- include/soc/mscc/ocelot.h | 8 +++ 2 files changed, 73 insertions(+), 13 deletions(-)