diff mbox series

[net-next,v2,2/3] dsa: mv88e6xxx: Add support for RMU in select switches

Message ID 20220826063816.948397-3-mattias.forsblad@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series net: dsa: mv88e6xxx: Add RMU support | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 0 this patch: 3
netdev/cc_maintainers warning 1 maintainers not CCed: linux@armlinux.org.uk
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 0 this patch: 3
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 100 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Mattias Forsblad Aug. 26, 2022, 6:38 a.m. UTC
Implement support for handling RMU layer 3 frames
including receive and transmit.

Signed-off-by: Mattias Forsblad <mattias.forsblad@gmail.com>
---
 drivers/net/dsa/mv88e6xxx/Makefile  |   1 +
 drivers/net/dsa/mv88e6xxx/chip.c    |  13 ++
 drivers/net/dsa/mv88e6xxx/chip.h    |  20 ++
 drivers/net/dsa/mv88e6xxx/global1.c |  84 +++++++++
 drivers/net/dsa/mv88e6xxx/global1.h |   3 +
 drivers/net/dsa/mv88e6xxx/rmu.c     | 273 ++++++++++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/rmu.h     |  33 ++++
 7 files changed, 427 insertions(+)
 create mode 100644 drivers/net/dsa/mv88e6xxx/rmu.c
 create mode 100644 drivers/net/dsa/mv88e6xxx/rmu.h

Comments

Vladimir Oltean Aug. 30, 2022, 4:35 p.m. UTC | #1
On Fri, Aug 26, 2022 at 08:38:15AM +0200, Mattias Forsblad wrote:
>  static int mv88e6xxx_rmu_setup(struct mv88e6xxx_chip *chip)
>  {
> +	if (chip->info->ops->rmu_enable)
> +		if (!chip->info->ops->rmu_enable(chip))
> +			return mv88e6xxx_rmu_init(chip);
> +
>  	if (chip->info->ops->rmu_disable)
>  		return chip->info->ops->rmu_disable(chip);

I think it's very important for the RMU to still start as disabled.
You enable it dynamically when the master goes up.

> diff --git a/drivers/net/dsa/mv88e6xxx/rmu.c b/drivers/net/dsa/mv88e6xxx/rmu.c
> new file mode 100644
> index 000000000000..b7d850c099c5
> --- /dev/null
> +++ b/drivers/net/dsa/mv88e6xxx/rmu.c
> @@ -0,0 +1,273 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Marvell 88E6xxx Switch Remote Management Unit Support
> + *
> + * Copyright (c) 2022 Mattias Forsblad <mattias.forsblad@gmail.com>
> + *
> + */
> +
> +#include <asm/unaligned.h>
> +#include "rmu.h"
> +#include "global1.h"
> +
> +static int mv88e6xxx_validate_port_mac(struct dsa_switch *ds, struct sk_buff *skb)
> +{
> +	unsigned char *ethhdr;
> +	struct dsa_port *dp;
> +	u8 pkt_port;
> +
> +	pkt_port = (skb->data[7] >> 3) & 0xf;
> +	dp = dsa_to_port(ds, pkt_port);
> +	if (!dp) {
> +		dev_dbg_ratelimited(ds->dev, "RMU: couldn't find port for %d\n", pkt_port);
> +		return -ENXIO;
> +	}
> +
> +	/* Check matching MAC */
> +	ethhdr = skb_mac_header(skb);
> +	if (memcmp(dp->slave->dev_addr, ethhdr, ETH_ALEN)) {

ether_addr_equal()

Also, what happens if you don't validate the MAC DA of the response, and
in general, if you just put your MAC SA as the MAC address of the
operationally active RMU DSA master? I guess the whole idea is to
provide a MAC address which the DSA master won't drop with its RX
filter, and its own MAC address is just fine for that.

> +		dev_dbg_ratelimited(ds->dev, "RMU: mismatching MAC address for request. Rx %pM expecting %pM\n",
> +				    ethhdr, dp->slave->dev_addr);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +int mv88e6xxx_inband_rcv(struct dsa_switch *ds, struct sk_buff *skb, int seq_no)
> +{
> +	struct mv88e6xxx_chip *chip = ds->priv;
> +	struct mv88e6xxx_port *port;
> +	u16 prodnum;
> +	u16 format;
> +	u8 pkt_dev;
> +	u8 pkt_prt;
> +	u16 code;
> +	int i;
> +
> +	/* Extract response data */
> +	format = get_unaligned_be16(&skb->data[0]);
> +	if (format != htons(MV88E6XXX_RMU_RESP_FORMAT_1) &&
> +	    format != htons(MV88E6XXX_RMU_RESP_FORMAT_2)) {
> +		dev_err_ratelimited(chip->dev, "RMU: received unknown format 0x%04x", format);
> +		goto out;
> +	}
> +
> +	code = get_unaligned_be16(&skb->data[4]);
> +	if (code == ntohs(MV88E6XXX_RMU_RESP_ERROR)) {

Please build with sparse, "make W=1 C=1". These are all wrong. The
variables retrieved from packet headers should be __be16, and "htohs"
(network to host) should be "htons" (host to network).

> +		dev_err_ratelimited(chip->dev, "RMU: error response code 0x%04x", code);
> +		goto out;
> +	}
> +
> +	pkt_dev = skb->data[6] & 0x1f;
> +	if (!dsa_switch_find(ds->dst->index, pkt_dev)) {

What is the relation between the pkt_dev from here, the RMU header, and
the source_device from dsa_inband_rcv_ll()? Are they always the same?
Can they be different? You throw away the result from dsa_switch_find()
in any case.

> +		dev_err_ratelimited(chip->dev, "RMU: response from unknown chip with index %d\n",
> +				    pkt_dev);
> +		goto out;
> +	}
> +
> +	/* Check sequence number */
> +	if (seq_no != chip->rmu.seq_no) {
> +		dev_err_ratelimited(chip->dev, "RMU: wrong seqno received %d, expected %d",
> +				    seq_no, chip->rmu.seq_no);
> +		goto out;
> +	}
> +
> +	/* Check response code */
> +	switch (chip->rmu.request_cmd) {
> +	case MV88E6XXX_RMU_REQ_GET_ID: {
> +		if (code == MV88E6XXX_RMU_RESP_CODE_GOT_ID) {

I'd expect htons to be used even here, with 0, for type consistency.
The compiler should figure things out and not add extra code.

> +			prodnum = get_unaligned_be16(&skb->data[2]);
> +			chip->rmu.got_id = prodnum;
> +			dev_info_ratelimited(chip->dev, "RMU: received id OK with product number: 0x%04x\n",
> +					     chip->rmu.got_id);
> +		} else {
> +			dev_dbg_ratelimited(chip->dev,
> +					    "RMU: unknown response for GET_ID format 0x%04x code 0x%04x",
> +					    format, code);
> +		}
> +		break;
> +	}
> +	case MV88E6XXX_RMU_REQ_DUMP_MIB:
> +		if (code == MV88E6XXX_RMU_RESP_CODE_DUMP_MIB &&
> +		    !mv88e6xxx_validate_port_mac(ds, skb)) {
> +			pkt_prt = (skb->data[7] & 0x78) >> 3;
> +			port = &chip->ports[pkt_prt];

It would be good if you could structure the code in such a way that you
don't parse stuff from the packet twice, once in mv88e6xxx_validate_port_mac()
and once here.

> +			if (!port) {
> +				dev_err_ratelimited(chip->dev, "RMU: illegal port number in response: %d\n",
> +						    pkt_prt);
> +				goto out;
> +			}
> +
> +			/* Copy whole array for further
> +			 * processing according to chip type
> +			 */
> +			for (i = 0; i < MV88E6XXX_RMU_MAX_RMON; i++)
> +				port->rmu_raw_stats[i] = get_unaligned_be32(&skb->data[12 + i * 4]);
> +		}
> +		break;
> +	default:
> +		dev_err_ratelimited(chip->dev,
> +				    "RMU: unknown response format 0x%04x and code 0x%04x from chip %d\n",
> +				    format, code, chip->ds->index);
> +		break;
> +	}
> +
> +out:
> +	complete(&chip->rmu.completion);
> +
> +	return 0;
> +}
> +
> +static int mv88e6xxx_rmu_tx(struct mv88e6xxx_chip *chip, int port,
> +			    const char *msg, int len)
> +{
> +	const struct dsa_device_ops *tag_ops;
> +	const struct dsa_port *dp;
> +	unsigned char *data;
> +	struct sk_buff *skb;
> +
> +	dp = dsa_to_port(chip->ds, port);
> +	if (!dp || !dp->cpu_dp)
> +		return 0;
> +
> +	tag_ops = dp->cpu_dp->tag_ops;
> +	if (!tag_ops)
> +		return -ENODEV;
> +
> +	skb = netdev_alloc_skb(chip->rmu.netdev, 64);
> +	if (!skb)
> +		return -ENOMEM;
> +
> +	skb_reserve(skb, 2 * ETH_HLEN + tag_ops->needed_headroom);

If you decide to rework this using the master netdev, you can use
dsa_tag_protocol_overhead(master->dsa_ptr->tag_ops). Or even reserve
enough headroom for the larger header (EDSA) and be done with it.
But then you need to construct a different header depending on whether
DSA or EDSA is used.

> +	skb_reset_network_header(skb);
> +	skb->pkt_type = PACKET_OUTGOING;

Could you please explain for me what will setting skb->pkt_type to
PACKET_OUTGOING achieve?

> +	skb->dev = chip->rmu.netdev;
> +
> +	/* Create RMU L3 message */
> +	data = skb_put(skb, len);
> +	memcpy(data, msg, len);
> +
> +	return tag_ops->inband_xmit(skb, dp->slave, ++chip->rmu.seq_no);
> +}
> +
> +static int mv88e6xxx_rmu_send_wait(struct mv88e6xxx_chip *chip, int port,
> +				   int request, const char *msg, int len)
> +{
> +	const struct dsa_port *dp;
> +	int ret = 0;
> +
> +	dp = dsa_to_port(chip->ds, port);
> +	if (!dp)
> +		return 0;
> +
> +	mutex_lock(&chip->rmu.mutex);
> +
> +	chip->rmu.request_cmd = request;
> +
> +	ret = mv88e6xxx_rmu_tx(chip, port, msg, len);
> +	if (ret == -ENODEV) {
> +		/* Device not ready yet? Try again later */
> +		ret = 0;

All the code paths in mv88e6xxx_rmu_tx() that return -ENODEV are
fabricated reasons to have errors, IMO, and in a correct implementation
we should never even get there. Please drop the special casing.

> +		goto out;
> +	}
> +
> +	if (ret) {
> +		dev_dbg(chip->dev, "RMU: error transmitting request (%d)", ret);

Please have a normal log level for an error, dev_err().
Also you can print the symbolic name for the error:

		dev_err(chip->dev, "RMU: error transmitting request (%pe)",
			ERR_PTR(ret));

> +		goto out;
> +	}
> +
> +	ret = wait_for_completion_timeout(&chip->rmu.completion,
> +					  msecs_to_jiffies(MV88E6XXX_WAIT_POLL_TIME_MS));
> +	if (ret == 0) {
> +		dev_dbg(chip->dev,
> +			"RMU: timeout waiting for request %d (%d) on dev:port %d:%d\n",
> +			request, ret, chip->ds->index, port);

Again, please increase the log level of the error condition.
Also, chip->ds->index is useless information, we have info about the
switch via dev_dbg/dev_err.

> +		ret = -ETIMEDOUT;
> +	}
> +
> +out:
> +	mutex_unlock(&chip->rmu.mutex);
> +
> +	return ret > 0 ? 0 : ret;
> +}
> +
> +static int mv88e6xxx_rmu_get_id(struct mv88e6xxx_chip *chip, int port)
> +{
> +	const u8 get_id[8] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
> +	int ret = -1;
> +
> +	if (chip->rmu.got_id)
> +		return 0;
> +
> +	chip->rmu.netdev = dev_get_by_name(&init_net, "chan0");

What if I don't have a slave device called "chan0"? I can't use the RMU?

> +	if (!chip->rmu.netdev) {
> +		dev_dbg(chip->dev, "RMU: unable to get interface");
> +		return -ENODEV;
> +	}
> +
> +	ret = mv88e6xxx_rmu_send_wait(chip, port, MV88E6XXX_RMU_REQ_GET_ID, get_id, 8);
> +	if (ret) {
> +		dev_dbg(chip->dev, "RMU: error for command GET_ID %d index %d\n", ret,
> +			chip->ds->index);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int mv88e6xxx_rmu_stats_get(struct mv88e6xxx_chip *chip, int port, uint64_t *data)
> +{
> +	u8 dump_mib[8] = { 0x00, 0x01, 0x00, 0x00, 0x10, 0x20, 0x00, 0x00 };
> +	int ret;
> +
> +	if (!chip)
> +		return 0;

How can "chip" be NULL?

> +
> +	ret = mv88e6xxx_rmu_get_id(chip, port);
> +	if (ret)
> +		return ret;
> +
> +	/* Send a GET_MIB command */
> +	dump_mib[7] = port;
> +	ret = mv88e6xxx_rmu_send_wait(chip, port, MV88E6XXX_RMU_REQ_DUMP_MIB, dump_mib, 8);
> +	if (ret) {
> +		dev_dbg(chip->dev, "RMU: error for command DUMP_MIB %d dev %d:%d\n", ret,
> +			chip->ds->index, port);
> +		return ret;
> +	}
> +
> +	/* Update MIB for port */
> +	if (chip->info->ops->stats_get_stats)
> +		return chip->info->ops->stats_get_stats(chip, port, data);
> +
> +	return 0;
> +}
> +
> +static struct mv88e6xxx_bus_ops mv88e6xxx_bus_ops = {

static const struct

> +	.get_rmon = mv88e6xxx_rmu_stats_get,
> +};
> +
> +int mv88e6xxx_rmu_init(struct mv88e6xxx_chip *chip)
> +{
> +	dev_info(chip->dev, "RMU: setting up for switch@%d", chip->sw_addr);

Not a whole lotta useful information brought by this. Also,
chip->sw_addr is already visible via dev_info(chip->dev).
I would just drop this.

> +
> +	init_completion(&chip->rmu.completion);
> +
> +	mutex_init(&chip->rmu.mutex);

I would just move these to mv88e6xxx_rmu_setup(), and move
mv88e6xxx_rmu_setup() to rmu.c.

> +
> +	chip->rmu.ops = &mv88e6xxx_bus_ops;

By doing this (combined with the way in which chip->rmu.ops is actually
tested for), you are saying that RMU is available since driver probe time.
But it isn't. It's only available when the master goes up and is
operational. So you're setting yourself up for lots of I/O failures in
the beginning.

> +
> +	return 0;
> +}
> +
> +void mv88e6xxx_rmu_master_change(struct dsa_switch *ds, const struct net_device *master,
> +				 bool operational)
> +{
> +	struct mv88e6xxx_chip *chip = ds->priv;
> +
> +	if (operational)
> +		chip->rmu.ops = &mv88e6xxx_bus_ops;
> +	else
> +		chip->rmu.ops = NULL;
> +}

There is a subtle but very important point to be careful about here,
which is compatibility with multiple CPU ports. If there is a second DSA
master whose state flaps from up to down, this should not affect the
fact that you can still use RMU over the first DSA master. But in your
case it does, so this is a case of how not to write code that accounts
for that.

In fact, given this fact, I think your function prototypes for
chip->info->ops->rmu_enable() are all wrong / not sufficiently
reflective of what the hardware can do. If the hardware has a bit mask
of ports on which RMU operations are possible, why hardcode using
dsa_switch_upstream_port() and not look at which DSA masters/CPU ports
are actually up? At least for the top-most switch. For downstream
switches we can use dsa_switch_upstream_port(), I guess (even that can
be refined, but I'm not aware of setups using multiple DSA links, where
each DSA link ultimately goes to a different upstream switch).
Vladimir Oltean Aug. 30, 2022, 4:42 p.m. UTC | #2
On Tue, Aug 30, 2022 at 07:35:15PM +0300, Vladimir Oltean wrote:
> > +void mv88e6xxx_rmu_master_change(struct dsa_switch *ds, const struct net_device *master,
> > +				 bool operational)
> > +{
> > +	struct mv88e6xxx_chip *chip = ds->priv;
> > +
> > +	if (operational)
> > +		chip->rmu.ops = &mv88e6xxx_bus_ops;
> > +	else
> > +		chip->rmu.ops = NULL;
> > +}
> 
> There is a subtle but very important point to be careful about here,
> which is compatibility with multiple CPU ports. If there is a second DSA
> master whose state flaps from up to down, this should not affect the
> fact that you can still use RMU over the first DSA master. But in your
> case it does, so this is a case of how not to write code that accounts
> for that.
> 
> In fact, given this fact, I think your function prototypes for
> chip->info->ops->rmu_enable() are all wrong / not sufficiently
> reflective of what the hardware can do. If the hardware has a bit mask
> of ports on which RMU operations are possible, why hardcode using
> dsa_switch_upstream_port() and not look at which DSA masters/CPU ports
> are actually up? At least for the top-most switch. For downstream
> switches we can use dsa_switch_upstream_port(), I guess (even that can
> be refined, but I'm not aware of setups using multiple DSA links, where
> each DSA link ultimately goes to a different upstream switch).

Hit "send" too soon. Wanted to give the extra hint that the "master"
pointer is given to you here for a reason. You can look at struct
dsa_port *cpu_dp = master->dsa_ptr, and figure out the index of the CPU
port which can be used for RMU operations. I see that the macros are
constructed in a very strange way:

#define MV88E6352_G1_CTL2_RMU_MODE_DISABLED	0x0000
#define MV88E6352_G1_CTL2_RMU_MODE_PORT_4	0x1000
#define MV88E6352_G1_CTL2_RMU_MODE_PORT_5	0x2000
#define MV88E6352_G1_CTL2_RMU_MODE_PORT_6	0x3000

it's as if this is actually a bit mask of ports, and they all can be
combined together. The bit in G1_CTL2 whose state we can flip can be
made to depend on the number of the CPU port attached to the DSA master
which changed state.
Mattias Forsblad Aug. 31, 2022, 6:12 a.m. UTC | #3
On 2022-08-30 18:35, Vladimir Oltean wrote:
> On Fri, Aug 26, 2022 at 08:38:15AM +0200, Mattias Forsblad wrote:
>>  static int mv88e6xxx_rmu_setup(struct mv88e6xxx_chip *chip)
>>  {
>> +	if (chip->info->ops->rmu_enable)
>> +		if (!chip->info->ops->rmu_enable(chip))
>> +			return mv88e6xxx_rmu_init(chip);
>> +
>>  	if (chip->info->ops->rmu_disable)
>>  		return chip->info->ops->rmu_disable(chip);
> 
> I think it's very important for the RMU to still start as disabled.
> You enable it dynamically when the master goes up.
> 

Please elaborate why this may pose a problem, I might have missed
some information.

>> diff --git a/drivers/net/dsa/mv88e6xxx/rmu.c b/drivers/net/dsa/mv88e6xxx/rmu.c
>> new file mode 100644
>> index 000000000000..b7d850c099c5
>> --- /dev/null
>> +++ b/drivers/net/dsa/mv88e6xxx/rmu.c
>> @@ -0,0 +1,273 @@
>> +// SPDX-License-Identifier: GPL-2.0-or-later
>> +/*
>> + * Marvell 88E6xxx Switch Remote Management Unit Support
>> + *
>> + * Copyright (c) 2022 Mattias Forsblad <mattias.forsblad@gmail.com>
>> + *
>> + */
>> +
>> +#include <asm/unaligned.h>
>> +#include "rmu.h"
>> +#include "global1.h"
>> +
>> +static int mv88e6xxx_validate_port_mac(struct dsa_switch *ds, struct sk_buff *skb)
>> +{
>> +	unsigned char *ethhdr;
>> +	struct dsa_port *dp;
>> +	u8 pkt_port;
>> +
>> +	pkt_port = (skb->data[7] >> 3) & 0xf;
>> +	dp = dsa_to_port(ds, pkt_port);
>> +	if (!dp) {
>> +		dev_dbg_ratelimited(ds->dev, "RMU: couldn't find port for %d\n", pkt_port);
>> +		return -ENXIO;
>> +	}
>> +
>> +	/* Check matching MAC */
>> +	ethhdr = skb_mac_header(skb);
>> +	if (memcmp(dp->slave->dev_addr, ethhdr, ETH_ALEN)) {
> 
> ether_addr_equal()
> 

Ofc.

> Also, what happens if you don't validate the MAC DA of the response, and
> in general, if you just put your MAC SA as the MAC address of the
> operationally active RMU DSA master? I guess the whole idea is to
> provide a MAC address which the DSA master won't drop with its RX
> filter, and its own MAC address is just fine for that.
>>> +		dev_dbg_ratelimited(ds->dev, "RMU: mismatching MAC address for request. Rx %pM expecting %pM\n",
>> +				    ethhdr, dp->slave->dev_addr);
>> +		return -EINVAL;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +int mv88e6xxx_inband_rcv(struct dsa_switch *ds, struct sk_buff *skb, int seq_no)
>> +{
>> +	struct mv88e6xxx_chip *chip = ds->priv;
>> +	struct mv88e6xxx_port *port;
>> +	u16 prodnum;
>> +	u16 format;
>> +	u8 pkt_dev;
>> +	u8 pkt_prt;
>> +	u16 code;
>> +	int i;
>> +
>> +	/* Extract response data */
>> +	format = get_unaligned_be16(&skb->data[0]);
>> +	if (format != htons(MV88E6XXX_RMU_RESP_FORMAT_1) &&
>> +	    format != htons(MV88E6XXX_RMU_RESP_FORMAT_2)) {
>> +		dev_err_ratelimited(chip->dev, "RMU: received unknown format 0x%04x", format);
>> +		goto out;
>> +	}
>> +
>> +	code = get_unaligned_be16(&skb->data[4]);
>> +	if (code == ntohs(MV88E6XXX_RMU_RESP_ERROR)) {
> 
> Please build with sparse, "make W=1 C=1". These are all wrong. The
> variables retrieved from packet headers should be __be16, and "htohs"
> (network to host) should be "htons" (host to network).
> 

Will check.

>> +		dev_err_ratelimited(chip->dev, "RMU: error response code 0x%04x", code);
>> +		goto out;
>> +	}
>> +
>> +	pkt_dev = skb->data[6] & 0x1f;
>> +	if (!dsa_switch_find(ds->dst->index, pkt_dev)) {
> 
> What is the relation between the pkt_dev from here, the RMU header, and
> the source_device from dsa_inband_rcv_ll()? Are they always the same?
> Can they be different? You throw away the result from dsa_switch_find()
> in any case.
> 

Will check.

>> +		dev_err_ratelimited(chip->dev, "RMU: response from unknown chip with index %d\n",
>> +				    pkt_dev);
>> +		goto out;
>> +	}
>> +
>> +	/* Check sequence number */
>> +	if (seq_no != chip->rmu.seq_no) {
>> +		dev_err_ratelimited(chip->dev, "RMU: wrong seqno received %d, expected %d",
>> +				    seq_no, chip->rmu.seq_no);
>> +		goto out;
>> +	}
>> +
>> +	/* Check response code */
>> +	switch (chip->rmu.request_cmd) {
>> +	case MV88E6XXX_RMU_REQ_GET_ID: {
>> +		if (code == MV88E6XXX_RMU_RESP_CODE_GOT_ID) {
> 
> I'd expect htons to be used even here, with 0, for type consistency.
> The compiler should figure things out and not add extra code.
> 

Thanks.

>> +			prodnum = get_unaligned_be16(&skb->data[2]);
>> +			chip->rmu.got_id = prodnum;
>> +			dev_info_ratelimited(chip->dev, "RMU: received id OK with product number: 0x%04x\n",
>> +					     chip->rmu.got_id);
>> +		} else {
>> +			dev_dbg_ratelimited(chip->dev,
>> +					    "RMU: unknown response for GET_ID format 0x%04x code 0x%04x",
>> +					    format, code);
>> +		}
>> +		break;
>> +	}
>> +	case MV88E6XXX_RMU_REQ_DUMP_MIB:
>> +		if (code == MV88E6XXX_RMU_RESP_CODE_DUMP_MIB &&
>> +		    !mv88e6xxx_validate_port_mac(ds, skb)) {
>> +			pkt_prt = (skb->data[7] & 0x78) >> 3;
>> +			port = &chip->ports[pkt_prt];
> 
> It would be good if you could structure the code in such a way that you
> don't parse stuff from the packet twice, once in mv88e6xxx_validate_port_mac()
> and once here.
> 

Agreed.

>> +			if (!port) {
>> +				dev_err_ratelimited(chip->dev, "RMU: illegal port number in response: %d\n",
>> +						    pkt_prt);
>> +				goto out;
>> +			}
>> +
>> +			/* Copy whole array for further
>> +			 * processing according to chip type
>> +			 */
>> +			for (i = 0; i < MV88E6XXX_RMU_MAX_RMON; i++)
>> +				port->rmu_raw_stats[i] = get_unaligned_be32(&skb->data[12 + i * 4]);
>> +		}
>> +		break;
>> +	default:
>> +		dev_err_ratelimited(chip->dev,
>> +				    "RMU: unknown response format 0x%04x and code 0x%04x from chip %d\n",
>> +				    format, code, chip->ds->index);
>> +		break;
>> +	}
>> +
>> +out:
>> +	complete(&chip->rmu.completion);
>> +
>> +	return 0;
>> +}
>> +
>> +static int mv88e6xxx_rmu_tx(struct mv88e6xxx_chip *chip, int port,
>> +			    const char *msg, int len)
>> +{
>> +	const struct dsa_device_ops *tag_ops;
>> +	const struct dsa_port *dp;
>> +	unsigned char *data;
>> +	struct sk_buff *skb;
>> +
>> +	dp = dsa_to_port(chip->ds, port);
>> +	if (!dp || !dp->cpu_dp)
>> +		return 0;
>> +
>> +	tag_ops = dp->cpu_dp->tag_ops;
>> +	if (!tag_ops)
>> +		return -ENODEV;
>> +
>> +	skb = netdev_alloc_skb(chip->rmu.netdev, 64);
>> +	if (!skb)
>> +		return -ENOMEM;
>> +
>> +	skb_reserve(skb, 2 * ETH_HLEN + tag_ops->needed_headroom);
> 
> If you decide to rework this using the master netdev, you can use
> dsa_tag_protocol_overhead(master->dsa_ptr->tag_ops). Or even reserve
> enough headroom for the larger header (EDSA) and be done with it.
> But then you need to construct a different header depending on whether
> DSA or EDSA is used.
> 

So in the new version a la qca8k we need the 'extra' parameter to
see if we need space for EDSA header, thus we need run through the tagger.
We can discuss that in the next version.

>> +	skb_reset_network_header(skb);
>> +	skb->pkt_type = PACKET_OUTGOING;
> 
> Could you please explain for me what will setting skb->pkt_type to
> PACKET_OUTGOING achieve?
>

I though it was prudent, will remove if it's not needed.
 
>> +	skb->dev = chip->rmu.netdev;
>> +
>> +	/* Create RMU L3 message */
>> +	data = skb_put(skb, len);
>> +	memcpy(data, msg, len);
>> +
>> +	return tag_ops->inband_xmit(skb, dp->slave, ++chip->rmu.seq_no);
>> +}
>> +
>> +static int mv88e6xxx_rmu_send_wait(struct mv88e6xxx_chip *chip, int port,
>> +				   int request, const char *msg, int len)
>> +{
>> +	const struct dsa_port *dp;
>> +	int ret = 0;
>> +
>> +	dp = dsa_to_port(chip->ds, port);
>> +	if (!dp)
>> +		return 0;
>> +
>> +	mutex_lock(&chip->rmu.mutex);
>> +
>> +	chip->rmu.request_cmd = request;
>> +
>> +	ret = mv88e6xxx_rmu_tx(chip, port, msg, len);
>> +	if (ret == -ENODEV) {
>> +		/* Device not ready yet? Try again later */
>> +		ret = 0;
> 
> All the code paths in mv88e6xxx_rmu_tx() that return -ENODEV are
> fabricated reasons to have errors, IMO, and in a correct implementation
> we should never even get there. Please drop the special casing.
> 

Re-worked in the next version.

>> +		goto out;
>> +	}
>> +
>> +	if (ret) {
>> +		dev_dbg(chip->dev, "RMU: error transmitting request (%d)", ret);
> 
> Please have a normal log level for an error, dev_err().
> Also you can print the symbolic name for the error:
> 
> 		dev_err(chip->dev, "RMU: error transmitting request (%pe)",
> 			ERR_PTR(ret));
> 

Thanks.

>> +		goto out;
>> +	}
>> +
>> +	ret = wait_for_completion_timeout(&chip->rmu.completion,
>> +					  msecs_to_jiffies(MV88E6XXX_WAIT_POLL_TIME_MS));
>> +	if (ret == 0) {
>> +		dev_dbg(chip->dev,
>> +			"RMU: timeout waiting for request %d (%d) on dev:port %d:%d\n",
>> +			request, ret, chip->ds->index, port);
> 
> Again, please increase the log level of the error condition.
> Also, chip->ds->index is useless information, we have info about the
> switch via dev_dbg/dev_err.
> 

Ok, thanks.

>> +		ret = -ETIMEDOUT;
>> +	}
>> +
>> +out:
>> +	mutex_unlock(&chip->rmu.mutex);
>> +
>> +	return ret > 0 ? 0 : ret;
>> +}
>> +
>> +static int mv88e6xxx_rmu_get_id(struct mv88e6xxx_chip *chip, int port)
>> +{
>> +	const u8 get_id[8] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
>> +	int ret = -1;
>> +
>> +	if (chip->rmu.got_id)
>> +		return 0;
>> +
>> +	chip->rmu.netdev = dev_get_by_name(&init_net, "chan0");
> 
> What if I don't have a slave device called "chan0"? I can't use the RMU?
> 

Reworked in the next version.

>> +	if (!chip->rmu.netdev) {
>> +		dev_dbg(chip->dev, "RMU: unable to get interface");
>> +		return -ENODEV;
>> +	}
>> +
>> +	ret = mv88e6xxx_rmu_send_wait(chip, port, MV88E6XXX_RMU_REQ_GET_ID, get_id, 8);
>> +	if (ret) {
>> +		dev_dbg(chip->dev, "RMU: error for command GET_ID %d index %d\n", ret,
>> +			chip->ds->index);
>> +		return ret;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static int mv88e6xxx_rmu_stats_get(struct mv88e6xxx_chip *chip, int port, uint64_t *data)
>> +{
>> +	u8 dump_mib[8] = { 0x00, 0x01, 0x00, 0x00, 0x10, 0x20, 0x00, 0x00 };
>> +	int ret;
>> +
>> +	if (!chip)
>> +		return 0;
> 
> How can "chip" be NULL?
> 

Yes, Andrew pointed that out. Thanks.

>> +
>> +	ret = mv88e6xxx_rmu_get_id(chip, port);
>> +	if (ret)
>> +		return ret;
>> +
>> +	/* Send a GET_MIB command */
>> +	dump_mib[7] = port;
>> +	ret = mv88e6xxx_rmu_send_wait(chip, port, MV88E6XXX_RMU_REQ_DUMP_MIB, dump_mib, 8);
>> +	if (ret) {
>> +		dev_dbg(chip->dev, "RMU: error for command DUMP_MIB %d dev %d:%d\n", ret,
>> +			chip->ds->index, port);
>> +		return ret;
>> +	}
>> +
>> +	/* Update MIB for port */
>> +	if (chip->info->ops->stats_get_stats)
>> +		return chip->info->ops->stats_get_stats(chip, port, data);
>> +
>> +	return 0;
>> +}
>> +
>> +static struct mv88e6xxx_bus_ops mv88e6xxx_bus_ops = {
> 
> static const struct
> 
>> +	.get_rmon = mv88e6xxx_rmu_stats_get,
>> +};
>> +
>> +int mv88e6xxx_rmu_init(struct mv88e6xxx_chip *chip)
>> +{
>> +	dev_info(chip->dev, "RMU: setting up for switch@%d", chip->sw_addr);
> 
> Not a whole lotta useful information brought by this. Also,
> chip->sw_addr is already visible via dev_info(chip->dev).
> I would just drop this.
> 

Ok.

>> +
>> +	init_completion(&chip->rmu.completion);
>> +
>> +	mutex_init(&chip->rmu.mutex);
> 
> I would just move these to mv88e6xxx_rmu_setup(), and move
> mv88e6xxx_rmu_setup() to rmu.c.
> 
>> +
>> +	chip->rmu.ops = &mv88e6xxx_bus_ops;
> 
> By doing this (combined with the way in which chip->rmu.ops is actually
> tested for), you are saying that RMU is available since driver probe time.
> But it isn't. It's only available when the master goes up and is
> operational. So you're setting yourself up for lots of I/O failures in
> the beginning.
> 

Reworked in the next version.

>> +
>> +	return 0;
>> +}
>> +
>> +void mv88e6xxx_rmu_master_change(struct dsa_switch *ds, const struct net_device *master,
>> +				 bool operational)
>> +{
>> +	struct mv88e6xxx_chip *chip = ds->priv;
>> +
>> +	if (operational)
>> +		chip->rmu.ops = &mv88e6xxx_bus_ops;
>> +	else
>> +		chip->rmu.ops = NULL;
>> +}
> 
> There is a subtle but very important point to be careful about here,
> which is compatibility with multiple CPU ports. If there is a second DSA
> master whose state flaps from up to down, this should not affect the
> fact that you can still use RMU over the first DSA master. But in your
> case it does, so this is a case of how not to write code that accounts
> for that.
> 
> In fact, given this fact, I think your function prototypes for
> chip->info->ops->rmu_enable() are all wrong / not sufficiently
> reflective of what the hardware can do. If the hardware has a bit mask
> of ports on which RMU operations are possible, why hardcode using
> dsa_switch_upstream_port() and not look at which DSA masters/CPU ports
> are actually up? At least for the top-most switch. For downstream
> switches we can use dsa_switch_upstream_port(), I guess (even that can
> be refined, but I'm not aware of setups using multiple DSA links, where
> each DSA link ultimately goes to a different upstream switch).

This is also changed a la qca8k.
Mattias Forsblad Aug. 31, 2022, 6:15 a.m. UTC | #4
On 2022-08-30 18:42, Vladimir Oltean wrote:
> On Tue, Aug 30, 2022 at 07:35:15PM +0300, Vladimir Oltean wrote:
>>> +void mv88e6xxx_rmu_master_change(struct dsa_switch *ds, const struct net_device *master,
>>> +				 bool operational)
>>> +{
>>> +	struct mv88e6xxx_chip *chip = ds->priv;
>>> +
>>> +	if (operational)
>>> +		chip->rmu.ops = &mv88e6xxx_bus_ops;
>>> +	else
>>> +		chip->rmu.ops = NULL;
>>> +}
>>
>> There is a subtle but very important point to be careful about here,
>> which is compatibility with multiple CPU ports. If there is a second DSA
>> master whose state flaps from up to down, this should not affect the
>> fact that you can still use RMU over the first DSA master. But in your
>> case it does, so this is a case of how not to write code that accounts
>> for that.
>>
>> In fact, given this fact, I think your function prototypes for
>> chip->info->ops->rmu_enable() are all wrong / not sufficiently
>> reflective of what the hardware can do. If the hardware has a bit mask
>> of ports on which RMU operations are possible, why hardcode using
>> dsa_switch_upstream_port() and not look at which DSA masters/CPU ports
>> are actually up? At least for the top-most switch. For downstream
>> switches we can use dsa_switch_upstream_port(), I guess (even that can
>> be refined, but I'm not aware of setups using multiple DSA links, where
>> each DSA link ultimately goes to a different upstream switch).
> 
> Hit "send" too soon. Wanted to give the extra hint that the "master"
> pointer is given to you here for a reason. You can look at struct
> dsa_port *cpu_dp = master->dsa_ptr, and figure out the index of the CPU
> port which can be used for RMU operations. I see that the macros are
> constructed in a very strange way:
> 

Ah, nice one. Thanks.

> #define MV88E6352_G1_CTL2_RMU_MODE_DISABLED	0x0000
> #define MV88E6352_G1_CTL2_RMU_MODE_PORT_4	0x1000
> #define MV88E6352_G1_CTL2_RMU_MODE_PORT_5	0x2000
> #define MV88E6352_G1_CTL2_RMU_MODE_PORT_6	0x3000
> 
> it's as if this is actually a bit mask of ports, and they all can be
> combined together. The bit in G1_CTL2 whose state we can flip can be
> made to depend on the number of the CPU port attached to the DSA master
> which changed state.
Vladimir Oltean Aug. 31, 2022, 3:24 p.m. UTC | #5
On Wed, Aug 31, 2022 at 08:12:09AM +0200, Mattias Forsblad wrote:
> Please elaborate why this may pose a problem, I might have missed
> some information.

Because as I said, you need to enable it over the CPU port connected to
the master which goes up and is operational, and you don't have this
information at probe time.

> > If you decide to rework this using the master netdev, you can use
> > dsa_tag_protocol_overhead(master->dsa_ptr->tag_ops). Or even reserve
> > enough headroom for the larger header (EDSA) and be done with it.
> > But then you need to construct a different header depending on whether
> > DSA or EDSA is used.
> > 
> 
> So in the new version a la qca8k we need the 'extra' parameter to
> see if we need space for EDSA header, thus we need run through the tagger.
> We can discuss that in the next version.

"thus we need run through the tagger" -> is this a justification that
you're going to keep the tag_ops->inband_xmit?

You don't _have_ to, you already have access to the tagging protocol in
use via chip->tag_protocol, you can derive from that if you need the E
in EDSA or not, and still keep everything within the switch driver.

> > Could you please explain for me what will setting skb->pkt_type to
> > PACKET_OUTGOING achieve?
> >
> 
> I though it was prudent, will remove if it's not needed.

I honestly don't know what it does.
Mattias Forsblad Sept. 1, 2022, 9:05 a.m. UTC | #6
On 2022-08-30 18:42, Vladimir Oltean wrote:
> On Tue, Aug 30, 2022 at 07:35:15PM +0300, Vladimir Oltean wrote:
>>> +void mv88e6xxx_rmu_master_change(struct dsa_switch *ds, const struct net_device *master,
>>> +				 bool operational)
>>> +{
>>> +	struct mv88e6xxx_chip *chip = ds->priv;
>>> +
>>> +	if (operational)
>>> +		chip->rmu.ops = &mv88e6xxx_bus_ops;
>>> +	else
>>> +		chip->rmu.ops = NULL;
>>> +}
>>
>> There is a subtle but very important point to be careful about here,
>> which is compatibility with multiple CPU ports. If there is a second DSA
>> master whose state flaps from up to down, this should not affect the
>> fact that you can still use RMU over the first DSA master. But in your
>> case it does, so this is a case of how not to write code that accounts
>> for that.
>>
>> In fact, given this fact, I think your function prototypes for
>> chip->info->ops->rmu_enable() are all wrong / not sufficiently
>> reflective of what the hardware can do. If the hardware has a bit mask
>> of ports on which RMU operations are possible, why hardcode using
>> dsa_switch_upstream_port() and not look at which DSA masters/CPU ports
>> are actually up? At least for the top-most switch. For downstream
>> switches we can use dsa_switch_upstream_port(), I guess (even that can
>> be refined, but I'm not aware of setups using multiple DSA links, where
>> each DSA link ultimately goes to a different upstream switch).
> 
> Hit "send" too soon. Wanted to give the extra hint that the "master"
> pointer is given to you here for a reason. You can look at struct
> dsa_port *cpu_dp = master->dsa_ptr, and figure out the index of the CPU

Would this work on a system where there are multiple switches? I.e.

SOC <->port6 SC#1 <->port10 SC#2

Both have the same master interface (chan0) which gives the same
cpu_dp->dsa_ptr->index but they have different upstream ports that should 
be enabled for RMU.

> port which can be used for RMU operations. I see that the macros are
> constructed in a very strange way:
> 
> #define MV88E6352_G1_CTL2_RMU_MODE_DISABLED	0x0000
> #define MV88E6352_G1_CTL2_RMU_MODE_PORT_4	0x1000
> #define MV88E6352_G1_CTL2_RMU_MODE_PORT_5	0x2000
> #define MV88E6352_G1_CTL2_RMU_MODE_PORT_6	0x3000
> 
> it's as if this is actually a bit mask of ports, and they all can be
> combined together. The bit in G1_CTL2 whose state we can flip can be
> made to depend on the number of the CPU port attached to the DSA master
> which changed state.
Vladimir Oltean Sept. 1, 2022, 1:14 p.m. UTC | #7
On Thu, Sep 01, 2022 at 11:05:37AM +0200, Mattias Forsblad wrote:
> Would this work on a system where there are multiple switches? I.e.
> 
> SOC <->port6 SC#1 <->port10 SC#2
> 
> Both have the same master interface (chan0) which gives the same
> cpu_dp->dsa_ptr->index but they have different upstream ports that should 
> be enabled for RMU.

I think the answer to that is dsa_towards_port(ds, cpu_dp->ds->index, cpu_dp->index);
diff mbox series

Patch

diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile
index c8eca2b6f959..105d7bd832c9 100644
--- a/drivers/net/dsa/mv88e6xxx/Makefile
+++ b/drivers/net/dsa/mv88e6xxx/Makefile
@@ -15,3 +15,4 @@  mv88e6xxx-objs += port_hidden.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += ptp.o
 mv88e6xxx-objs += serdes.o
 mv88e6xxx-objs += smi.o
+mv88e6xxx-objs += rmu.o
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 07e9a4da924c..4c0510abd875 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -42,6 +42,7 @@ 
 #include "ptp.h"
 #include "serdes.h"
 #include "smi.h"
+#include "rmu.h"
 
 static void assert_reg_lock(struct mv88e6xxx_chip *chip)
 {
@@ -1529,6 +1530,10 @@  static int mv88e6xxx_trunk_setup(struct mv88e6xxx_chip *chip)
 
 static int mv88e6xxx_rmu_setup(struct mv88e6xxx_chip *chip)
 {
+	if (chip->info->ops->rmu_enable)
+		if (!chip->info->ops->rmu_enable(chip))
+			return mv88e6xxx_rmu_init(chip);
+
 	if (chip->info->ops->rmu_disable)
 		return chip->info->ops->rmu_disable(chip);
 
@@ -4090,6 +4095,7 @@  static const struct mv88e6xxx_ops mv88e6085_ops = {
 	.ppu_disable = mv88e6185_g1_ppu_disable,
 	.reset = mv88e6185_g1_reset,
 	.rmu_disable = mv88e6085_g1_rmu_disable,
+	.rmu_enable = mv88e6085_g1_rmu_enable,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.stu_getnext = mv88e6352_g1_stu_getnext,
@@ -4173,6 +4179,7 @@  static const struct mv88e6xxx_ops mv88e6097_ops = {
 	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.rmu_disable = mv88e6085_g1_rmu_disable,
+	.rmu_enable = mv88e6085_g1_rmu_enable,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.phylink_get_caps = mv88e6095_phylink_get_caps,
@@ -5292,6 +5299,7 @@  static const struct mv88e6xxx_ops mv88e6352_ops = {
 	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.rmu_disable = mv88e6352_g1_rmu_disable,
+	.rmu_enable = mv88e6352_g1_rmu_enable,
 	.atu_get_hash = mv88e6165_g1_atu_get_hash,
 	.atu_set_hash = mv88e6165_g1_atu_set_hash,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
@@ -5359,6 +5367,7 @@  static const struct mv88e6xxx_ops mv88e6390_ops = {
 	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.rmu_disable = mv88e6390_g1_rmu_disable,
+	.rmu_enable = mv88e6390_g1_rmu_enable,
 	.atu_get_hash = mv88e6165_g1_atu_get_hash,
 	.atu_set_hash = mv88e6165_g1_atu_set_hash,
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
@@ -5426,6 +5435,7 @@  static const struct mv88e6xxx_ops mv88e6390x_ops = {
 	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.rmu_disable = mv88e6390_g1_rmu_disable,
+	.rmu_enable = mv88e6390_g1_rmu_enable,
 	.atu_get_hash = mv88e6165_g1_atu_get_hash,
 	.atu_set_hash = mv88e6165_g1_atu_set_hash,
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
@@ -5496,6 +5506,7 @@  static const struct mv88e6xxx_ops mv88e6393x_ops = {
 	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.rmu_disable = mv88e6390_g1_rmu_disable,
+	.rmu_enable = mv88e6390_g1_rmu_enable,
 	.atu_get_hash = mv88e6165_g1_atu_get_hash,
 	.atu_set_hash = mv88e6165_g1_atu_set_hash,
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
@@ -6918,6 +6929,8 @@  static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
 	.crosschip_lag_change	= mv88e6xxx_crosschip_lag_change,
 	.crosschip_lag_join	= mv88e6xxx_crosschip_lag_join,
 	.crosschip_lag_leave	= mv88e6xxx_crosschip_lag_leave,
+	.inband_receive         = mv88e6xxx_inband_rcv,
+	.master_state_change	= mv88e6xxx_rmu_master_change,
 };
 
 static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index e693154cf803..024f45cc1476 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -33,6 +33,8 @@ 
 
 #define MV88E6XXX_MAX_GPIO	16
 
+#define MV88E6XXX_WAIT_POLL_TIME_MS		200
+
 enum mv88e6xxx_egress_mode {
 	MV88E6XXX_EGRESS_MODE_UNMODIFIED,
 	MV88E6XXX_EGRESS_MODE_UNTAGGED,
@@ -266,6 +268,7 @@  struct mv88e6xxx_vlan {
 struct mv88e6xxx_port {
 	struct mv88e6xxx_chip *chip;
 	int port;
+	u64 rmu_raw_stats[64];
 	struct mv88e6xxx_vlan bridge_pvid;
 	u64 serdes_stats[2];
 	u64 atu_member_violation;
@@ -282,6 +285,18 @@  struct mv88e6xxx_port {
 	struct devlink_region *region;
 };
 
+struct mv88e6xxx_rmu {
+	/* RMU resources */
+	struct net_device *netdev;
+	struct mv88e6xxx_bus_ops *ops;
+	struct completion completion;
+	/* Mutex for RMU operations */
+	struct mutex mutex;
+	u16 got_id;
+	u8 request_cmd;
+	u8 seq_no;
+};
+
 enum mv88e6xxx_region_id {
 	MV88E6XXX_REGION_GLOBAL1 = 0,
 	MV88E6XXX_REGION_GLOBAL2,
@@ -410,12 +425,16 @@  struct mv88e6xxx_chip {
 
 	/* Bridge MST to SID mappings */
 	struct list_head msts;
+
+	/* RMU resources */
+	struct mv88e6xxx_rmu rmu;
 };
 
 struct mv88e6xxx_bus_ops {
 	int (*read)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
 	int (*write)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
 	int (*init)(struct mv88e6xxx_chip *chip);
+	int (*get_rmon)(struct mv88e6xxx_chip *chip, int port, uint64_t *data);
 };
 
 struct mv88e6xxx_mdio_bus {
@@ -637,6 +656,7 @@  struct mv88e6xxx_ops {
 
 	/* Remote Management Unit operations */
 	int (*rmu_disable)(struct mv88e6xxx_chip *chip);
+	int (*rmu_enable)(struct mv88e6xxx_chip *chip);
 
 	/* Precision Time Protocol operations */
 	const struct mv88e6xxx_ptp_ops *ptp_ops;
diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c
index 5848112036b0..5d86dbf39347 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.c
+++ b/drivers/net/dsa/mv88e6xxx/global1.c
@@ -466,18 +466,102 @@  int mv88e6085_g1_rmu_disable(struct mv88e6xxx_chip *chip)
 				      MV88E6085_G1_CTL2_RM_ENABLE, 0);
 }
 
+int mv88e6085_g1_rmu_enable(struct mv88e6xxx_chip *chip)
+{
+	int val = MV88E6352_G1_CTL2_RMU_MODE_DISABLED;
+	int upstream_port = -1;
+
+	upstream_port = dsa_switch_upstream_port(chip->ds);
+	dev_dbg(chip->dev, "RMU: Enabling on port %d", upstream_port);
+	if (upstream_port < 0)
+		return -EOPNOTSUPP;
+
+	switch (upstream_port) {
+	case 9:
+		val = MV88E6085_G1_CTL2_RM_ENABLE;
+		break;
+	case 10:
+		val = MV88E6085_G1_CTL2_RM_ENABLE | MV88E6085_G1_CTL2_P10RM;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return mv88e6xxx_g1_ctl2_mask(chip, MV88E6085_G1_CTL2_P10RM |
+				      MV88E6085_G1_CTL2_RM_ENABLE, val);
+}
+
 int mv88e6352_g1_rmu_disable(struct mv88e6xxx_chip *chip)
 {
 	return mv88e6xxx_g1_ctl2_mask(chip, MV88E6352_G1_CTL2_RMU_MODE_MASK,
 				      MV88E6352_G1_CTL2_RMU_MODE_DISABLED);
 }
 
+int mv88e6352_g1_rmu_enable(struct mv88e6xxx_chip *chip)
+{
+	int val = MV88E6352_G1_CTL2_RMU_MODE_DISABLED;
+	int upstream_port;
+
+	upstream_port = dsa_switch_upstream_port(chip->ds);
+	dev_dbg(chip->dev, "RMU: Enabling on port %d", upstream_port);
+	if (upstream_port < 0)
+		return -EOPNOTSUPP;
+
+	switch (upstream_port) {
+	case 4:
+		val = MV88E6352_G1_CTL2_RMU_MODE_PORT_4;
+		break;
+	case 5:
+		val = MV88E6352_G1_CTL2_RMU_MODE_PORT_5;
+		break;
+	case 6:
+		val = MV88E6352_G1_CTL2_RMU_MODE_PORT_6;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return mv88e6xxx_g1_ctl2_mask(chip, MV88E6352_G1_CTL2_RMU_MODE_MASK,
+			val);
+}
+
 int mv88e6390_g1_rmu_disable(struct mv88e6xxx_chip *chip)
 {
 	return mv88e6xxx_g1_ctl2_mask(chip, MV88E6390_G1_CTL2_RMU_MODE_MASK,
 				      MV88E6390_G1_CTL2_RMU_MODE_DISABLED);
 }
 
+int mv88e6390_g1_rmu_enable(struct mv88e6xxx_chip *chip)
+{
+	int val = MV88E6390_G1_CTL2_RMU_MODE_DISABLED;
+	int upstream_port;
+
+	upstream_port = dsa_switch_upstream_port(chip->ds);
+	dev_dbg(chip->dev, "RMU: Enabling on port %d", upstream_port);
+	if (upstream_port < 0)
+		return -EOPNOTSUPP;
+
+	switch (upstream_port) {
+	case 0:
+		val = MV88E6390_G1_CTL2_RMU_MODE_PORT_0;
+		break;
+	case 1:
+		val = MV88E6390_G1_CTL2_RMU_MODE_PORT_1;
+		break;
+	case 9:
+		val = MV88E6390_G1_CTL2_RMU_MODE_PORT_9;
+		break;
+	case 10:
+		val = MV88E6390_G1_CTL2_RMU_MODE_PORT_10;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return mv88e6xxx_g1_ctl2_mask(chip, MV88E6390_G1_CTL2_RMU_MODE_MASK,
+			val);
+}
+
 int mv88e6390_g1_stats_set_histogram(struct mv88e6xxx_chip *chip)
 {
 	return mv88e6xxx_g1_ctl2_mask(chip, MV88E6390_G1_CTL2_HIST_MODE_MASK,
diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
index 65958b2a0d3a..7e786503734a 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.h
+++ b/drivers/net/dsa/mv88e6xxx/global1.h
@@ -313,8 +313,11 @@  int mv88e6250_g1_ieee_pri_map(struct mv88e6xxx_chip *chip);
 int mv88e6185_g1_set_cascade_port(struct mv88e6xxx_chip *chip, int port);
 
 int mv88e6085_g1_rmu_disable(struct mv88e6xxx_chip *chip);
+int mv88e6085_g1_rmu_enable(struct mv88e6xxx_chip *chip);
 int mv88e6352_g1_rmu_disable(struct mv88e6xxx_chip *chip);
+int mv88e6352_g1_rmu_enable(struct mv88e6xxx_chip *chip);
 int mv88e6390_g1_rmu_disable(struct mv88e6xxx_chip *chip);
+int mv88e6390_g1_rmu_enable(struct mv88e6xxx_chip *chip);
 
 int mv88e6xxx_g1_set_device_number(struct mv88e6xxx_chip *chip, int index);
 
diff --git a/drivers/net/dsa/mv88e6xxx/rmu.c b/drivers/net/dsa/mv88e6xxx/rmu.c
new file mode 100644
index 000000000000..b7d850c099c5
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/rmu.c
@@ -0,0 +1,273 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Marvell 88E6xxx Switch Remote Management Unit Support
+ *
+ * Copyright (c) 2022 Mattias Forsblad <mattias.forsblad@gmail.com>
+ *
+ */
+
+#include <asm/unaligned.h>
+#include "rmu.h"
+#include "global1.h"
+
+static int mv88e6xxx_validate_port_mac(struct dsa_switch *ds, struct sk_buff *skb)
+{
+	unsigned char *ethhdr;
+	struct dsa_port *dp;
+	u8 pkt_port;
+
+	pkt_port = (skb->data[7] >> 3) & 0xf;
+	dp = dsa_to_port(ds, pkt_port);
+	if (!dp) {
+		dev_dbg_ratelimited(ds->dev, "RMU: couldn't find port for %d\n", pkt_port);
+		return -ENXIO;
+	}
+
+	/* Check matching MAC */
+	ethhdr = skb_mac_header(skb);
+	if (memcmp(dp->slave->dev_addr, ethhdr, ETH_ALEN)) {
+		dev_dbg_ratelimited(ds->dev, "RMU: mismatching MAC address for request. Rx %pM expecting %pM\n",
+				    ethhdr, dp->slave->dev_addr);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int mv88e6xxx_inband_rcv(struct dsa_switch *ds, struct sk_buff *skb, int seq_no)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+	struct mv88e6xxx_port *port;
+	u16 prodnum;
+	u16 format;
+	u8 pkt_dev;
+	u8 pkt_prt;
+	u16 code;
+	int i;
+
+	/* Extract response data */
+	format = get_unaligned_be16(&skb->data[0]);
+	if (format != htons(MV88E6XXX_RMU_RESP_FORMAT_1) &&
+	    format != htons(MV88E6XXX_RMU_RESP_FORMAT_2)) {
+		dev_err_ratelimited(chip->dev, "RMU: received unknown format 0x%04x", format);
+		goto out;
+	}
+
+	code = get_unaligned_be16(&skb->data[4]);
+	if (code == ntohs(MV88E6XXX_RMU_RESP_ERROR)) {
+		dev_err_ratelimited(chip->dev, "RMU: error response code 0x%04x", code);
+		goto out;
+	}
+
+	pkt_dev = skb->data[6] & 0x1f;
+	if (!dsa_switch_find(ds->dst->index, pkt_dev)) {
+		dev_err_ratelimited(chip->dev, "RMU: response from unknown chip with index %d\n",
+				    pkt_dev);
+		goto out;
+	}
+
+	/* Check sequence number */
+	if (seq_no != chip->rmu.seq_no) {
+		dev_err_ratelimited(chip->dev, "RMU: wrong seqno received %d, expected %d",
+				    seq_no, chip->rmu.seq_no);
+		goto out;
+	}
+
+	/* Check response code */
+	switch (chip->rmu.request_cmd) {
+	case MV88E6XXX_RMU_REQ_GET_ID: {
+		if (code == MV88E6XXX_RMU_RESP_CODE_GOT_ID) {
+			prodnum = get_unaligned_be16(&skb->data[2]);
+			chip->rmu.got_id = prodnum;
+			dev_info_ratelimited(chip->dev, "RMU: received id OK with product number: 0x%04x\n",
+					     chip->rmu.got_id);
+		} else {
+			dev_dbg_ratelimited(chip->dev,
+					    "RMU: unknown response for GET_ID format 0x%04x code 0x%04x",
+					    format, code);
+		}
+		break;
+	}
+	case MV88E6XXX_RMU_REQ_DUMP_MIB:
+		if (code == MV88E6XXX_RMU_RESP_CODE_DUMP_MIB &&
+		    !mv88e6xxx_validate_port_mac(ds, skb)) {
+			pkt_prt = (skb->data[7] & 0x78) >> 3;
+			port = &chip->ports[pkt_prt];
+			if (!port) {
+				dev_err_ratelimited(chip->dev, "RMU: illegal port number in response: %d\n",
+						    pkt_prt);
+				goto out;
+			}
+
+			/* Copy whole array for further
+			 * processing according to chip type
+			 */
+			for (i = 0; i < MV88E6XXX_RMU_MAX_RMON; i++)
+				port->rmu_raw_stats[i] = get_unaligned_be32(&skb->data[12 + i * 4]);
+		}
+		break;
+	default:
+		dev_err_ratelimited(chip->dev,
+				    "RMU: unknown response format 0x%04x and code 0x%04x from chip %d\n",
+				    format, code, chip->ds->index);
+		break;
+	}
+
+out:
+	complete(&chip->rmu.completion);
+
+	return 0;
+}
+
+static int mv88e6xxx_rmu_tx(struct mv88e6xxx_chip *chip, int port,
+			    const char *msg, int len)
+{
+	const struct dsa_device_ops *tag_ops;
+	const struct dsa_port *dp;
+	unsigned char *data;
+	struct sk_buff *skb;
+
+	dp = dsa_to_port(chip->ds, port);
+	if (!dp || !dp->cpu_dp)
+		return 0;
+
+	tag_ops = dp->cpu_dp->tag_ops;
+	if (!tag_ops)
+		return -ENODEV;
+
+	skb = netdev_alloc_skb(chip->rmu.netdev, 64);
+	if (!skb)
+		return -ENOMEM;
+
+	skb_reserve(skb, 2 * ETH_HLEN + tag_ops->needed_headroom);
+	skb_reset_network_header(skb);
+	skb->pkt_type = PACKET_OUTGOING;
+	skb->dev = chip->rmu.netdev;
+
+	/* Create RMU L3 message */
+	data = skb_put(skb, len);
+	memcpy(data, msg, len);
+
+	return tag_ops->inband_xmit(skb, dp->slave, ++chip->rmu.seq_no);
+}
+
+static int mv88e6xxx_rmu_send_wait(struct mv88e6xxx_chip *chip, int port,
+				   int request, const char *msg, int len)
+{
+	const struct dsa_port *dp;
+	int ret = 0;
+
+	dp = dsa_to_port(chip->ds, port);
+	if (!dp)
+		return 0;
+
+	mutex_lock(&chip->rmu.mutex);
+
+	chip->rmu.request_cmd = request;
+
+	ret = mv88e6xxx_rmu_tx(chip, port, msg, len);
+	if (ret == -ENODEV) {
+		/* Device not ready yet? Try again later */
+		ret = 0;
+		goto out;
+	}
+
+	if (ret) {
+		dev_dbg(chip->dev, "RMU: error transmitting request (%d)", ret);
+		goto out;
+	}
+
+	ret = wait_for_completion_timeout(&chip->rmu.completion,
+					  msecs_to_jiffies(MV88E6XXX_WAIT_POLL_TIME_MS));
+	if (ret == 0) {
+		dev_dbg(chip->dev,
+			"RMU: timeout waiting for request %d (%d) on dev:port %d:%d\n",
+			request, ret, chip->ds->index, port);
+		ret = -ETIMEDOUT;
+	}
+
+out:
+	mutex_unlock(&chip->rmu.mutex);
+
+	return ret > 0 ? 0 : ret;
+}
+
+static int mv88e6xxx_rmu_get_id(struct mv88e6xxx_chip *chip, int port)
+{
+	const u8 get_id[8] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+	int ret = -1;
+
+	if (chip->rmu.got_id)
+		return 0;
+
+	chip->rmu.netdev = dev_get_by_name(&init_net, "chan0");
+	if (!chip->rmu.netdev) {
+		dev_dbg(chip->dev, "RMU: unable to get interface");
+		return -ENODEV;
+	}
+
+	ret = mv88e6xxx_rmu_send_wait(chip, port, MV88E6XXX_RMU_REQ_GET_ID, get_id, 8);
+	if (ret) {
+		dev_dbg(chip->dev, "RMU: error for command GET_ID %d index %d\n", ret,
+			chip->ds->index);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int mv88e6xxx_rmu_stats_get(struct mv88e6xxx_chip *chip, int port, uint64_t *data)
+{
+	u8 dump_mib[8] = { 0x00, 0x01, 0x00, 0x00, 0x10, 0x20, 0x00, 0x00 };
+	int ret;
+
+	if (!chip)
+		return 0;
+
+	ret = mv88e6xxx_rmu_get_id(chip, port);
+	if (ret)
+		return ret;
+
+	/* Send a GET_MIB command */
+	dump_mib[7] = port;
+	ret = mv88e6xxx_rmu_send_wait(chip, port, MV88E6XXX_RMU_REQ_DUMP_MIB, dump_mib, 8);
+	if (ret) {
+		dev_dbg(chip->dev, "RMU: error for command DUMP_MIB %d dev %d:%d\n", ret,
+			chip->ds->index, port);
+		return ret;
+	}
+
+	/* Update MIB for port */
+	if (chip->info->ops->stats_get_stats)
+		return chip->info->ops->stats_get_stats(chip, port, data);
+
+	return 0;
+}
+
+static struct mv88e6xxx_bus_ops mv88e6xxx_bus_ops = {
+	.get_rmon = mv88e6xxx_rmu_stats_get,
+};
+
+int mv88e6xxx_rmu_init(struct mv88e6xxx_chip *chip)
+{
+	dev_info(chip->dev, "RMU: setting up for switch@%d", chip->sw_addr);
+
+	init_completion(&chip->rmu.completion);
+
+	mutex_init(&chip->rmu.mutex);
+
+	chip->rmu.ops = &mv88e6xxx_bus_ops;
+
+	return 0;
+}
+
+void mv88e6xxx_rmu_master_change(struct dsa_switch *ds, const struct net_device *master,
+				 bool operational)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+
+	if (operational)
+		chip->rmu.ops = &mv88e6xxx_bus_ops;
+	else
+		chip->rmu.ops = NULL;
+}
diff --git a/drivers/net/dsa/mv88e6xxx/rmu.h b/drivers/net/dsa/mv88e6xxx/rmu.h
new file mode 100644
index 000000000000..19be624a3ebf
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/rmu.h
@@ -0,0 +1,33 @@ 
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Marvell 88E6xxx Switch Remote Management Unit Support
+ *
+ * Copyright (c) 2022 Mattias Forsblad <mattias.forsblad@gmail.com>
+ *
+ */
+
+#ifndef _MV88E6XXX_RMU_H_
+#define _MV88E6XXX_RMU_H_
+
+#include "chip.h"
+
+#define MV88E6XXX_RMU_MAX_RMON			64
+
+#define MV88E6XXX_RMU_REQ_GET_ID		1
+#define MV88E6XXX_RMU_REQ_DUMP_MIB		2
+
+#define MV88E6XXX_RMU_RESP_FORMAT_1		0x0001
+#define MV88E6XXX_RMU_RESP_FORMAT_2		0x0002
+#define MV88E6XXX_RMU_RESP_ERROR		0xffff
+
+#define MV88E6XXX_RMU_RESP_CODE_GOT_ID		0x0000
+#define MV88E6XXX_RMU_RESP_CODE_DUMP_MIB	0x1020
+
+int mv88e6xxx_rmu_init(struct mv88e6xxx_chip *chip);
+
+int mv88e6xxx_inband_rcv(struct dsa_switch *ds, struct sk_buff *skb, int seq_no);
+
+void mv88e6xxx_rmu_master_change(struct dsa_switch *ds, const struct net_device *master,
+				 bool operational);
+
+#endif /* _MV88E6XXX_RMU_H_ */