diff mbox series

[v2,net-next,6/6] net: dsa: mv88e6xxx: Add "rmon" counter group support

Message ID 20231205160418.3770042-7-tobias@waldekranz.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: dsa: mv88e6xxx: Add "eth-mac" and "rmon" counter group support | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1115 this patch: 1115
netdev/cc_maintainers warning 2 maintainers not CCed: pabeni@redhat.com edumazet@google.com
netdev/build_clang success Errors and warnings before: 1142 this patch: 1142
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1142 this patch: 1142
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Tobias Waldekranz Dec. 5, 2023, 4:04 p.m. UTC
Report the applicable subset of an mv88e6xxx port's counters using
ethtool's standardized "rmon" counter group.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 42 ++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

Comments

Vladimir Oltean Dec. 5, 2023, 6:11 p.m. UTC | #1
On Tue, Dec 05, 2023 at 05:04:18PM +0100, Tobias Waldekranz wrote:
> Report the applicable subset of an mv88e6xxx port's counters using
> ethtool's standardized "rmon" counter group.
> 
> Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
> ---
>  drivers/net/dsa/mv88e6xxx/chip.c | 42 ++++++++++++++++++++++++++++++++
>  1 file changed, 42 insertions(+)
> 
> diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
> index 1a16698181fb..2e74109196f4 100644
> --- a/drivers/net/dsa/mv88e6xxx/chip.c
> +++ b/drivers/net/dsa/mv88e6xxx/chip.c
> @@ -1357,6 +1357,47 @@ static void mv88e6xxx_get_eth_mac_stats(struct dsa_switch *ds, int port,
> +	MV88E6XXX_RMON_STAT_MAP(in_undersize, undersize_pkts);
> +	MV88E6XXX_RMON_STAT_MAP(in_oversize, oversize_pkts);
> +	MV88E6XXX_RMON_STAT_MAP(in_fragments, fragments);
> +	MV88E6XXX_RMON_STAT_MAP(in_jabber, jabbers);
> +	MV88E6XXX_RMON_STAT_MAP(hist_64bytes, hist[0]);
> +	MV88E6XXX_RMON_STAT_MAP(hist_65_127bytes, hist[1]);
> +	MV88E6XXX_RMON_STAT_MAP(hist_128_255bytes, hist[2]);
> +	MV88E6XXX_RMON_STAT_MAP(hist_256_511bytes, hist[3]);
> +	MV88E6XXX_RMON_STAT_MAP(hist_512_1023bytes, hist[4]);
> +	MV88E6XXX_RMON_STAT_MAP(hist_1024_max_bytes, hist[5]);

I see that these are in STATS_TYPE_BANK0 and that every switch provides
that. Good.

Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Vladimir Oltean Dec. 6, 2023, 12:22 a.m. UTC | #2
On Tue, Dec 05, 2023 at 05:04:18PM +0100, Tobias Waldekranz wrote:
> +static void mv88e6xxx_get_rmon_stats(struct dsa_switch *ds, int port,
> +				     struct ethtool_rmon_stats *rmon_stats,
> +				     const struct ethtool_rmon_hist_range **ranges)
> +{
> +	static const struct ethtool_rmon_hist_range rmon_ranges[] = {
> +		{   64,    64 },
> +		{   65,   127 },
> +		{  128,   255 },
> +		{  256,   511 },
> +		{  512,  1023 },
> +		{ 1024, 65535 },
> +		{}
> +	};
> +	struct mv88e6xxx_chip *chip = ds->priv;
> +	int ret;
> +
> +	ret = mv88e6xxx_stats_snapshot(chip, port);
> +	if (ret < 0)
> +		return;
> +
> +#define MV88E6XXX_RMON_STAT_MAP(_id, _member)				\
> +	mv88e6xxx_stats_get_stat(chip, port,				\
> +				 &mv88e6xxx_hw_stats[MV88E6XXX_HW_STAT_ID_ ## _id], \
> +				 &rmon_stats->stats._member)
> +
> +	MV88E6XXX_RMON_STAT_MAP(in_undersize, undersize_pkts);
> +	MV88E6XXX_RMON_STAT_MAP(in_oversize, oversize_pkts);
> +	MV88E6XXX_RMON_STAT_MAP(in_fragments, fragments);
> +	MV88E6XXX_RMON_STAT_MAP(in_jabber, jabbers);
> +	MV88E6XXX_RMON_STAT_MAP(hist_64bytes, hist[0]);
> +	MV88E6XXX_RMON_STAT_MAP(hist_65_127bytes, hist[1]);
> +	MV88E6XXX_RMON_STAT_MAP(hist_128_255bytes, hist[2]);
> +	MV88E6XXX_RMON_STAT_MAP(hist_256_511bytes, hist[3]);
> +	MV88E6XXX_RMON_STAT_MAP(hist_512_1023bytes, hist[4]);
> +	MV88E6XXX_RMON_STAT_MAP(hist_1024_max_bytes, hist[5]);
> +
> +#undef MV88E6XXX_RMON_STAT_MAP
> +
> +	*ranges = rmon_ranges;
> +}

I just noticed that this doesn't populate the TX counters, just RX.

I haven't tried it, but I think the Histogram Mode bits (11:10) of the
Stats Operation Register might be able to control what gets reported for
the Set 4 of counters. Currently AFAICS, the driver always sets it to
MV88E6XXX_G1_STATS_OP_HIST_RX_TX, aka what gets reported to
"rx-rmon-etherStatsPkts64to64Octets" is actually an RX+TX counter.

What's the story behind this?
Tobias Waldekranz Dec. 6, 2023, 8:27 a.m. UTC | #3
On ons, dec 06, 2023 at 02:22, Vladimir Oltean <olteanv@gmail.com> wrote:
> On Tue, Dec 05, 2023 at 05:04:18PM +0100, Tobias Waldekranz wrote:
>> +static void mv88e6xxx_get_rmon_stats(struct dsa_switch *ds, int port,
>> +				     struct ethtool_rmon_stats *rmon_stats,
>> +				     const struct ethtool_rmon_hist_range **ranges)
>> +{
>> +	static const struct ethtool_rmon_hist_range rmon_ranges[] = {
>> +		{   64,    64 },
>> +		{   65,   127 },
>> +		{  128,   255 },
>> +		{  256,   511 },
>> +		{  512,  1023 },
>> +		{ 1024, 65535 },
>> +		{}
>> +	};
>> +	struct mv88e6xxx_chip *chip = ds->priv;
>> +	int ret;
>> +
>> +	ret = mv88e6xxx_stats_snapshot(chip, port);
>> +	if (ret < 0)
>> +		return;
>> +
>> +#define MV88E6XXX_RMON_STAT_MAP(_id, _member)				\
>> +	mv88e6xxx_stats_get_stat(chip, port,				\
>> +				 &mv88e6xxx_hw_stats[MV88E6XXX_HW_STAT_ID_ ## _id], \
>> +				 &rmon_stats->stats._member)
>> +
>> +	MV88E6XXX_RMON_STAT_MAP(in_undersize, undersize_pkts);
>> +	MV88E6XXX_RMON_STAT_MAP(in_oversize, oversize_pkts);
>> +	MV88E6XXX_RMON_STAT_MAP(in_fragments, fragments);
>> +	MV88E6XXX_RMON_STAT_MAP(in_jabber, jabbers);
>> +	MV88E6XXX_RMON_STAT_MAP(hist_64bytes, hist[0]);
>> +	MV88E6XXX_RMON_STAT_MAP(hist_65_127bytes, hist[1]);
>> +	MV88E6XXX_RMON_STAT_MAP(hist_128_255bytes, hist[2]);
>> +	MV88E6XXX_RMON_STAT_MAP(hist_256_511bytes, hist[3]);
>> +	MV88E6XXX_RMON_STAT_MAP(hist_512_1023bytes, hist[4]);
>> +	MV88E6XXX_RMON_STAT_MAP(hist_1024_max_bytes, hist[5]);
>> +
>> +#undef MV88E6XXX_RMON_STAT_MAP
>> +
>> +	*ranges = rmon_ranges;
>> +}
>
> I just noticed that this doesn't populate the TX counters, just RX.
>
> I haven't tried it, but I think the Histogram Mode bits (11:10) of the
> Stats Operation Register might be able to control what gets reported for
> the Set 4 of counters. Currently AFAICS, the driver always sets it to
> MV88E6XXX_G1_STATS_OP_HIST_RX_TX, aka what gets reported to
> "rx-rmon-etherStatsPkts64to64Octets" is actually an RX+TX counter.

You have a keen eye! Yes, that is what's happening.

> What's the story behind this?

I think the story starts, and ends, with this value being the hardware
default.

Seeing as the hardware only has a single set of histogram counters, it
seems to me like we have to prioritize between:

1. Keeping Rx+Tx: Backwards-compatible, but we can't export any histogram via
   the standard RMON group.

2. Move to Rx-only: We can export them via the RMON group, but we change
   the behavior of the "native" counters.

3. Move to Tx-only: We can export them via the RMON group, but we change
   the behavior of the "native" counters.

Looking at RFC2819, which lays out the original RMON MIB, we find this
description:

    etherStatsPkts64Octets OBJECT-TYPE
        SYNTAX     Counter32
        UNITS      "Packets"
        MAX-ACCESS read-only
        STATUS     current
        DESCRIPTION
            "The total number of packets (including bad
            packets) received that were 64 octets in length
            (excluding framing bits but including FCS octets)."
        ::= { etherStatsEntry 14 }

In my opinion, this gives (2) a clear edge over (3), so we're down to
choosing between (1) and (2). Personally, I lean towards (2), as I think
it is more useful because:

- Most people will tend to assume that the histogram counters refers to
  those defined in RFC2819 anyway

- It means we can deliver _something_ rather than nothing to someone
  building an operating system, who is looking for a hardware
  independent way of providing diagnostics
Vladimir Oltean Dec. 6, 2023, 7:55 p.m. UTC | #4
On Wed, Dec 06, 2023 at 09:27:29AM +0100, Tobias Waldekranz wrote:
> > I just noticed that this doesn't populate the TX counters, just RX.
> >
> > I haven't tried it, but I think the Histogram Mode bits (11:10) of the
> > Stats Operation Register might be able to control what gets reported for
> > the Set 4 of counters. Currently AFAICS, the driver always sets it to
> > MV88E6XXX_G1_STATS_OP_HIST_RX_TX, aka what gets reported to
> > "rx-rmon-etherStatsPkts64to64Octets" is actually an RX+TX counter.
> 
> You have a keen eye! Yes, that is what's happening.

It would be nice if my failure-prone keen eye had the safety net of a
selftest that catches this kind of stuff. After all, the ethtool
counters were standardized in order for us to be able to expect standard
behavior out of them, and for nonconformities to stand out easily.

Do you think (bearing in mind that the questions below might make the
rest irrelevant) that you could look into creating a minimal test in
tools/testing/selftests/net/forwarding and symlinking it to
tools/testing/selftests/drivers/net/dsa? You can start from
ethtool_std_stats_get() and take inspiration from the way in which it is
used by ethtool_mm.sh.

> > What's the story behind this?
> 
> I think the story starts, and ends, with this value being the hardware
> default.

I do hope that is where the story actually ends.

But the 88E6097 documentation I have suggests that the Histogram Mode
bits are reserved to the value of 3 (RX+TX), which suggests that this
cannot be written to any other value.

> Seeing as the hardware only has a single set of histogram counters,

"Seeing" means you tested this? calling chip->info->ops->stats_set_histogram()
at runtime, and seeing if the previously hidden histogram counters are
reset to zero, or if they show retroactively counted packets?

I searched through the git logs, but it's not exactly clear that this
was tried and doesn't work.

> it seems to me like we have to prioritize between:
> 
> 1. Keeping Rx+Tx: Backwards-compatible, but we can't export any histogram via
>    the standard RMON group.
> 
> 2. Move to Rx-only: We can export them via the RMON group, but we change
>    the behavior of the "native" counters.
> 
> 3. Move to Tx-only: We can export them via the RMON group, but we change
>    the behavior of the "native" counters.
> 
> Looking at RFC2819, which lays out the original RMON MIB, we find this
> description:
> 
>     etherStatsPkts64Octets OBJECT-TYPE
>         SYNTAX     Counter32
>         UNITS      "Packets"
>         MAX-ACCESS read-only
>         STATUS     current
>         DESCRIPTION
>             "The total number of packets (including bad
>             packets) received that were 64 octets in length
>             (excluding framing bits but including FCS octets)."
>         ::= { etherStatsEntry 14 }
> 
> In my opinion, this gives (2) a clear edge over (3), so we're down to
> choosing between (1) and (2). Personally, I lean towards (2), as I think
> it is more useful because:
> 
> - Most people will tend to assume that the histogram counters refers to
>   those defined in RFC2819 anyway
> 
> - It means we can deliver _something_ rather than nothing to someone
>   building an operating system, who is looking for a hardware
>   independent way of providing diagnostics

If the "reserved to 3" thing is true, then both (2) and (3) become
practically non-options, at least for 88E6097. The waters would be
further muddied if the driver were to make some chips use one
Histogram Mode, and other chips a different one. It implies that as
a user, you would need to know what switch family you have, before
you know what "ethtool -S lan0 | grep hist_64bytes" is counting!
Tobias Waldekranz Dec. 7, 2023, 9:47 a.m. UTC | #5
On ons, dec 06, 2023 at 21:55, Vladimir Oltean <olteanv@gmail.com> wrote:
> On Wed, Dec 06, 2023 at 09:27:29AM +0100, Tobias Waldekranz wrote:
>> > I just noticed that this doesn't populate the TX counters, just RX.
>> >
>> > I haven't tried it, but I think the Histogram Mode bits (11:10) of the
>> > Stats Operation Register might be able to control what gets reported for
>> > the Set 4 of counters. Currently AFAICS, the driver always sets it to
>> > MV88E6XXX_G1_STATS_OP_HIST_RX_TX, aka what gets reported to
>> > "rx-rmon-etherStatsPkts64to64Octets" is actually an RX+TX counter.
>> 
>> You have a keen eye! Yes, that is what's happening.
>
> It would be nice if my failure-prone keen eye had the safety net of a
> selftest that catches this kind of stuff. After all, the ethtool
> counters were standardized in order for us to be able to expect standard
> behavior out of them, and for nonconformities to stand out easily.
>
> Do you think (bearing in mind that the questions below might make the
> rest irrelevant) that you could look into creating a minimal test in
> tools/testing/selftests/net/forwarding and symlinking it to
> tools/testing/selftests/drivers/net/dsa? You can start from
> ethtool_std_stats_get() and take inspiration from the way in which it is
> used by ethtool_mm.sh.

I'll give it the old college try.

>> > What's the story behind this?
>> 
>> I think the story starts, and ends, with this value being the hardware
>> default.
>
> I do hope that is where the story actually ends.
>
> But the 88E6097 documentation I have suggests that the Histogram Mode
> bits are reserved to the value of 3 (RX+TX), which suggests that this
> cannot be written to any other value.

I'm pretty sure that is just typo in the documentation. Every other
field in the documentation marked "RES" has the description "Reserved
for future use" - except for this one. My guess is that the author meant
to type "RWS to 0x3", since that is the convetion for all other
multi-bit fields with a non-zero reset value. W is right next to E on
most keyboards, which makes it even more likely.

In section 4.3.8, we find this:

    The counters are designed to support:
    - RFC 2819 – RMON MIB (this RFC obsoletes 1757 which obsoletes 1271)
    ....

At the bottom of that page there is this note for "Set 4" (which is the
histogram group):

    **Note**
    The Set 4 counters can be configured to be ingress only, egress
    only, or both.

These sentences have remained unchanged since at least 6095, up to and
including 6393X.

>> Seeing as the hardware only has a single set of histogram counters,
>
> "Seeing" means you tested this? calling chip->info->ops->stats_set_histogram()
> at runtime, and seeing if the previously hidden histogram counters are
> reset to zero, or if they show retroactively counted packets?

I've now tested it on a 6393X (the only chip I can get my hands on at
the moment). On this device, the bits have moved to Global1:0x1c, but
the description is the same. It behaves like the documentation would
suggest: There is a single set of histogram counters - the user gets to
choose if they collect stats from rx, tx, or both:

We start out with the default of counting rx+tx, we've seen 400 packets
before the test starts:

    root@infix-06-01-00:~# mdio f212a2* mvls 0 g1:0x1c
    0x07c0
    root@infix-06-01-00:~# ethtool -S x10 | grep hist_512
         hist_512_1023bytes: 400

Send 10 pings to our neighbor with a large payload, both rx and tx are
counted (20):

    root@infix-06-01-00:~# ping -L ff02::1%x10 -s 512 -c 10 -i 0.1 -q
    PING ff02::1%x10(ff02::1%x10) 512 data bytes

    --- ff02::1%x10 ping statistics ---
    10 packets transmitted, 10 received, 0% packet loss, time 934ms
    rtt min/avg/max/mdev = 0.150/0.172/0.313/0.046 ms
    root@infix-06-01-00:~# ethtool -S x10 | grep hist_512
         hist_512_1023bytes: 420

Limit to tx only:

    root@infix-06-01-00:~# mdio f212a2* mvls 0 g1:0x1c 0x0740

Same test again now only increments the (same) counter by 10:

    root@infix-06-01-00:~# ping -L ff02::1%x10 -s 512 -c 10 -i 0.1 -q
    PING ff02::1%x10(ff02::1%x10) 512 data bytes

    --- ff02::1%x10 ping statistics ---
    10 packets transmitted, 10 received, 0% packet loss, time 934ms
    rtt min/avg/max/mdev = 0.148/0.173/0.323/0.050 ms
    root@infix-06-01-00:~# ethtool -S x10 | grep hist_512
         hist_512_1023bytes: 430


> I searched through the git logs, but it's not exactly clear that this
> was tried and doesn't work.
>
>> it seems to me like we have to prioritize between:
>> 
>> 1. Keeping Rx+Tx: Backwards-compatible, but we can't export any histogram via
>>    the standard RMON group.
>> 
>> 2. Move to Rx-only: We can export them via the RMON group, but we change
>>    the behavior of the "native" counters.
>> 
>> 3. Move to Tx-only: We can export them via the RMON group, but we change
>>    the behavior of the "native" counters.
>> 
>> Looking at RFC2819, which lays out the original RMON MIB, we find this
>> description:
>> 
>>     etherStatsPkts64Octets OBJECT-TYPE
>>         SYNTAX     Counter32
>>         UNITS      "Packets"
>>         MAX-ACCESS read-only
>>         STATUS     current
>>         DESCRIPTION
>>             "The total number of packets (including bad
>>             packets) received that were 64 octets in length
>>             (excluding framing bits but including FCS octets)."
>>         ::= { etherStatsEntry 14 }
>> 
>> In my opinion, this gives (2) a clear edge over (3), so we're down to
>> choosing between (1) and (2). Personally, I lean towards (2), as I think
>> it is more useful because:
>> 
>> - Most people will tend to assume that the histogram counters refers to
>>   those defined in RFC2819 anyway
>> 
>> - It means we can deliver _something_ rather than nothing to someone
>>   building an operating system, who is looking for a hardware
>>   independent way of providing diagnostics
>
> If the "reserved to 3" thing is true, then both (2) and (3) become
> practically non-options, at least for 88E6097. The waters would be
> further muddied if the driver were to make some chips use one
> Histogram Mode, and other chips a different one. It implies that as
> a user, you would need to know what switch family you have, before
> you know what "ethtool -S lan0 | grep hist_64bytes" is counting!

I agree that having different chips work differently would be a
nightmare. Especially since this could mean that "lan1" might behave
differently than "lan0" if they are attached to different chips on the
same system.

Fortunately though, it looks like (2) is still on the table. Do you
agree with that assessment? If yes, do you think is the right way
forward?

Andrew, what's your opinion?
diff mbox series

Patch

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 1a16698181fb..2e74109196f4 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1357,6 +1357,47 @@  static void mv88e6xxx_get_eth_mac_stats(struct dsa_switch *ds, int port,
 	mac_stats->stats.FramesReceivedOK += mac_stats->stats.BroadcastFramesReceivedOK;
 }
 
+static void mv88e6xxx_get_rmon_stats(struct dsa_switch *ds, int port,
+				     struct ethtool_rmon_stats *rmon_stats,
+				     const struct ethtool_rmon_hist_range **ranges)
+{
+	static const struct ethtool_rmon_hist_range rmon_ranges[] = {
+		{   64,    64 },
+		{   65,   127 },
+		{  128,   255 },
+		{  256,   511 },
+		{  512,  1023 },
+		{ 1024, 65535 },
+		{}
+	};
+	struct mv88e6xxx_chip *chip = ds->priv;
+	int ret;
+
+	ret = mv88e6xxx_stats_snapshot(chip, port);
+	if (ret < 0)
+		return;
+
+#define MV88E6XXX_RMON_STAT_MAP(_id, _member)				\
+	mv88e6xxx_stats_get_stat(chip, port,				\
+				 &mv88e6xxx_hw_stats[MV88E6XXX_HW_STAT_ID_ ## _id], \
+				 &rmon_stats->stats._member)
+
+	MV88E6XXX_RMON_STAT_MAP(in_undersize, undersize_pkts);
+	MV88E6XXX_RMON_STAT_MAP(in_oversize, oversize_pkts);
+	MV88E6XXX_RMON_STAT_MAP(in_fragments, fragments);
+	MV88E6XXX_RMON_STAT_MAP(in_jabber, jabbers);
+	MV88E6XXX_RMON_STAT_MAP(hist_64bytes, hist[0]);
+	MV88E6XXX_RMON_STAT_MAP(hist_65_127bytes, hist[1]);
+	MV88E6XXX_RMON_STAT_MAP(hist_128_255bytes, hist[2]);
+	MV88E6XXX_RMON_STAT_MAP(hist_256_511bytes, hist[3]);
+	MV88E6XXX_RMON_STAT_MAP(hist_512_1023bytes, hist[4]);
+	MV88E6XXX_RMON_STAT_MAP(hist_1024_max_bytes, hist[5]);
+
+#undef MV88E6XXX_RMON_STAT_MAP
+
+	*ranges = rmon_ranges;
+}
+
 static int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
@@ -6877,6 +6918,7 @@  static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
 	.get_strings		= mv88e6xxx_get_strings,
 	.get_ethtool_stats	= mv88e6xxx_get_ethtool_stats,
 	.get_eth_mac_stats	= mv88e6xxx_get_eth_mac_stats,
+	.get_rmon_stats		= mv88e6xxx_get_rmon_stats,
 	.get_sset_count		= mv88e6xxx_get_sset_count,
 	.port_max_mtu		= mv88e6xxx_get_max_mtu,
 	.port_change_mtu	= mv88e6xxx_change_mtu,