diff mbox series

[net-next,v3] net: dsa: mv88e6xxx: Add erratum 3.14 for 88E6390X and 88E6190X

Message ID 20230721102618.13408-1-ante.knezic@helmholz.de (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net-next,v3] net: dsa: mv88e6xxx: Add erratum 3.14 for 88E6390X and 88E6190X | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1342 this patch: 1342
netdev/cc_maintainers warning 1 maintainers not CCed: linux@armlinux.org.uk
netdev/build_clang success Errors and warnings before: 1365 this patch: 1365
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1365 this patch: 1365
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 73 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Ante Knezic July 21, 2023, 10:26 a.m. UTC
Fixes XAUI/RXAUI lane alignment errors.
Issue causes dropped packets when trying to communicate over
fiber via SERDES lanes of port 9 and 10.
Errata document applies only to 88E6190X and 88E6390X devices.
Requires poking in undocumented registers.

Signed-off-by: Ante Knezic <ante.knezic@helmholz.de>
---
V3 : Rework to fit the new phylink_pcs infrastructure
V2 : Rework as suggested by Andrew Lunn <andrew@lun.ch> 
 * make int lanes[] const 
 * reorder prod_nums
 * update commit message to indicate we are dealing with
   undocumented Marvell registers and magic values
---
 drivers/net/dsa/mv88e6xxx/pcs-639x.c | 42 ++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

Comments

Paolo Abeni July 25, 2023, 8:56 a.m. UTC | #1
On Fri, 2023-07-21 at 12:26 +0200, Ante Knezic wrote:
> Fixes XAUI/RXAUI lane alignment errors.
> Issue causes dropped packets when trying to communicate over
> fiber via SERDES lanes of port 9 and 10.
> Errata document applies only to 88E6190X and 88E6390X devices.
> Requires poking in undocumented registers.
> 
> Signed-off-by: Ante Knezic <ante.knezic@helmholz.de>
> ---
> V3 : Rework to fit the new phylink_pcs infrastructure
> V2 : Rework as suggested by Andrew Lunn <andrew@lun.ch> 
>  * make int lanes[] const 
>  * reorder prod_nums
>  * update commit message to indicate we are dealing with
>    undocumented Marvell registers and magic values
> ---
>  drivers/net/dsa/mv88e6xxx/pcs-639x.c | 42 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 42 insertions(+)
> 
> diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
> index 98dd49dac421..50b14804c360 100644
> --- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c
> +++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
> @@ -20,6 +20,7 @@ struct mv88e639x_pcs {
>  	struct mdio_device mdio;
>  	struct phylink_pcs sgmii_pcs;
>  	struct phylink_pcs xg_pcs;
> +	struct mv88e6xxx_chip *chip;
>  	bool supports_5g;
>  	phy_interface_t interface;
>  	unsigned int irq;
> @@ -205,13 +206,52 @@ static void mv88e639x_sgmii_pcs_pre_config(struct phylink_pcs *pcs,
>  	mv88e639x_sgmii_pcs_control_pwr(mpcs, false);
>  }
>  
> +static int mv88e6390_erratum_3_14(struct mv88e639x_pcs *mpcs)
> +{
> +	const int lanes[] = { MV88E6390_PORT9_LANE0, MV88E6390_PORT9_LANE1,
> +		MV88E6390_PORT9_LANE2, MV88E6390_PORT9_LANE3,
> +		MV88E6390_PORT10_LANE0, MV88E6390_PORT10_LANE1,
> +		MV88E6390_PORT10_LANE2, MV88E6390_PORT10_LANE3 };
> +	struct mdio_device mdio;
> +	int err, i;
> +
> +	/* 88e6190x and 88e6390x errata 3.14:
> +	 * After chip reset, SERDES reconfiguration or SERDES core
> +	 * Software Reset, the SERDES lanes may not be properly aligned
> +	 * resulting in CRC errors
> +	 */
> +
> +	mdio.bus = mpcs->mdio.bus;
> +
> +	for (i = 0; i < ARRAY_SIZE(lanes); i++) {
> +		mdio.addr = lanes[i];
> +
> +		err = mdiodev_c45_write(&mdio, MDIO_MMD_PHYXS,
> +					0xf054, 0x400C);
> +		if (err)
> +			return err;
> +
> +		err = mdiodev_c45_write(&mdio, MDIO_MMD_PHYXS,
> +					0xf054, 0x4000);
> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
>  static int mv88e639x_sgmii_pcs_post_config(struct phylink_pcs *pcs,
>  					   phy_interface_t interface)
>  {
>  	struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
> +	struct mv88e6xxx_chip *chip = mpcs->chip;
>  
>  	mv88e639x_sgmii_pcs_control_pwr(mpcs, true);
>  
> +	if (chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6190X ||
> +	    chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6390X)
> +		mv88e6390_erratum_3_14(mpcs);

It looks like you are ignoring the errors reported by
mv88e6390_erratum_3_14(). Should the above be:

		return mv88e6390_erratum_3_14(mpcs);

instead?

Thanks!

Paolo
Ante Knezic July 25, 2023, 9:59 a.m. UTC | #2
On Tue, 25 Jul 2023 10:56:25 +0200 Paolo Abeni wrote
> It looks like you are ignoring the errors reported by
> mv88e6390_erratum_3_14(). Should the above be:
> 
> 		return mv88e6390_erratum_3_14(mpcs);
> 
> instead?
> 

I guess you are right. Would it make sense to do the evaluation for the 
	mv88e639x_sgmii_pcs_control_pwr(mpcs, true);
above as well?
Paolo Abeni July 25, 2023, 10:47 a.m. UTC | #3
[adding Russell]
On Tue, 2023-07-25 at 11:59 +0200, Ante Knezic wrote:
> On Tue, 25 Jul 2023 10:56:25 +0200 Paolo Abeni wrote
> > It looks like you are ignoring the errors reported by
> > mv88e6390_erratum_3_14(). Should the above be:
> > 
> > 		return mv88e6390_erratum_3_14(mpcs);
> > 
> > instead?
> > 
> 
> I guess you are right. Would it make sense to do the evaluation for the 
> 	mv88e639x_sgmii_pcs_control_pwr(mpcs, true);
> above as well?

Good question ;) it looks like pcs_post_config() errors are always
ignored by the core, but I guess it's better to report them as
accurately as possible.

@Russell, what it your preference here, should we just ignore the
generate errors earlier, or try to propagate them to the core/phylink,
should that later be changed to deal with them?

Thanks,

Paolo
Russell King (Oracle) July 25, 2023, 10:56 a.m. UTC | #4
On Tue, Jul 25, 2023 at 12:47:43PM +0200, Paolo Abeni wrote:
> [adding Russell]
> On Tue, 2023-07-25 at 11:59 +0200, Ante Knezic wrote:
> > On Tue, 25 Jul 2023 10:56:25 +0200 Paolo Abeni wrote
> > > It looks like you are ignoring the errors reported by
> > > mv88e6390_erratum_3_14(). Should the above be:
> > > 
> > > 		return mv88e6390_erratum_3_14(mpcs);
> > > 
> > > instead?
> > > 
> > 
> > I guess you are right. Would it make sense to do the evaluation for the 
> > 	mv88e639x_sgmii_pcs_control_pwr(mpcs, true);
> > above as well?
> 
> Good question ;) it looks like pcs_post_config() errors are always
> ignored by the core, but I guess it's better to report them as
> accurately as possible.

... because if they occur, it's way too late to attempt to unwind the
changes that have already occurred.

> @Russell, what it your preference here, should we just ignore the
> generate errors earlier, or try to propagate them to the core/phylink,
> should that later be changed to deal with them?

How would we deal with an error?

If it's a "we can't support this configuration" then that's a driver
problem, and means that the validation failed to exclude the
unsupported configuration.

If it's a communication error of some sort, then we're unlikely to
be able to back out the configuration change, because we probably
can't communicate with some device anymore - and there are paths
in phylink where these methods are called where there is no
possibility of propagating an error (due to being called in a
workqueue.)

I did decide that phylink_major_config() ought not proceed if
mac_prepare() fails, but really once mac_prepare() has been called
we're committed - and if an error happens after that, the network
interface is dead.
Vladimir Oltean July 25, 2023, 5:23 p.m. UTC | #5
On Fri, Jul 21, 2023 at 12:26:18PM +0200, Ante Knezic wrote:
> Fixes XAUI/RXAUI lane alignment errors.
> Issue causes dropped packets when trying to communicate over
> fiber via SERDES lanes of port 9 and 10.
> Errata document applies only to 88E6190X and 88E6390X devices.
> Requires poking in undocumented registers.
> 
> Signed-off-by: Ante Knezic <ante.knezic@helmholz.de>
> ---
> V3 : Rework to fit the new phylink_pcs infrastructure
> V2 : Rework as suggested by Andrew Lunn <andrew@lun.ch> 
>  * make int lanes[] const 
>  * reorder prod_nums
>  * update commit message to indicate we are dealing with
>    undocumented Marvell registers and magic values
> ---
>  drivers/net/dsa/mv88e6xxx/pcs-639x.c | 42 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 42 insertions(+)
> 
> diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
> index 98dd49dac421..50b14804c360 100644
> --- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c
> +++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
> @@ -20,6 +20,7 @@ struct mv88e639x_pcs {
>  	struct mdio_device mdio;
>  	struct phylink_pcs sgmii_pcs;
>  	struct phylink_pcs xg_pcs;
> +	struct mv88e6xxx_chip *chip;
>  	bool supports_5g;
>  	phy_interface_t interface;
>  	unsigned int irq;
> @@ -205,13 +206,52 @@ static void mv88e639x_sgmii_pcs_pre_config(struct phylink_pcs *pcs,
>  	mv88e639x_sgmii_pcs_control_pwr(mpcs, false);
>  }
>  
> +static int mv88e6390_erratum_3_14(struct mv88e639x_pcs *mpcs)
> +{
> +	const int lanes[] = { MV88E6390_PORT9_LANE0, MV88E6390_PORT9_LANE1,
> +		MV88E6390_PORT9_LANE2, MV88E6390_PORT9_LANE3,
> +		MV88E6390_PORT10_LANE0, MV88E6390_PORT10_LANE1,
> +		MV88E6390_PORT10_LANE2, MV88E6390_PORT10_LANE3 };
> +	struct mdio_device mdio;
> +	int err, i;
> +
> +	/* 88e6190x and 88e6390x errata 3.14:
> +	 * After chip reset, SERDES reconfiguration or SERDES core
> +	 * Software Reset, the SERDES lanes may not be properly aligned
> +	 * resulting in CRC errors
> +	 */
> +
> +	mdio.bus = mpcs->mdio.bus;
> +
> +	for (i = 0; i < ARRAY_SIZE(lanes); i++) {
> +		mdio.addr = lanes[i];
> +
> +		err = mdiodev_c45_write(&mdio, MDIO_MMD_PHYXS,
> +					0xf054, 0x400C);
> +		if (err)
> +			return err;
> +
> +		err = mdiodev_c45_write(&mdio, MDIO_MMD_PHYXS,
> +					0xf054, 0x4000);
> +		if (err)
> +			return err;

I'm not sure which way is preferred by PHY maintainers, but it seems to
be a useless complication to simulate that you have a struct mdio_device
for the other lanes when you don't. It appears more appropriate to just
use mdiobus_c45_write(mpcs->mdio.bus, lanes[i]).

There's also the locking question (with the big caveat that we don't
know what the register writes do!). There's locking at the bus level,
but the MDIO device isn't locked. So phylink on those other PCSes can
still do stuff, even in-between the first and the second write to
undocumented register 0xf054.

I can speculate that writing 0x400c -> 0x4000 is something like: set
RX_RESET | TX_RESET followed by clear RX_RESET | TX_RESET. Is it ok if
stuff happens in between these writes - will it stick, or does this
logically interact with anything else in any other way? I guess we won't
know. I might be a bit closer to being okay with it if you could confirm
that some other (unrelated) register write to the PCS does make it
through (and can be read back) in between the 2 erratum writes.

> +	}
> +
> +	return 0;
> +}
> +
>  static int mv88e639x_sgmii_pcs_post_config(struct phylink_pcs *pcs,
>  					   phy_interface_t interface)
>  {
>  	struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
> +	struct mv88e6xxx_chip *chip = mpcs->chip;
>  
>  	mv88e639x_sgmii_pcs_control_pwr(mpcs, true);
>  
> +	if (chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6190X ||
> +	    chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6390X)
> +		mv88e6390_erratum_3_14(mpcs);

You could at least print an error if a write failure occurred, so that
it doesn't go completely unnoticed.

> +
>  	return 0;
>  }
>  
> @@ -523,6 +563,7 @@ static int mv88e6390_pcs_init(struct mv88e6xxx_chip *chip, int port)
>  	mpcs->sgmii_pcs.neg_mode = true;
>  	mpcs->xg_pcs.ops = &mv88e6390_xg_pcs_ops;
>  	mpcs->xg_pcs.neg_mode = true;
> +	mpcs->chip = chip;
>  
>  	err = mv88e639x_pcs_setup_irq(mpcs, chip, port);
>  	if (err)
> @@ -873,6 +914,7 @@ static int mv88e6393x_pcs_init(struct mv88e6xxx_chip *chip, int port)
>  	mpcs->xg_pcs.ops = &mv88e6393x_xg_pcs_ops;
>  	mpcs->xg_pcs.neg_mode = true;
>  	mpcs->supports_5g = true;
> +	mpcs->chip = chip;
>  
>  	err = mv88e6393x_erratum_4_6(mpcs);
>  	if (err)
> -- 
> 2.11.0
>
Russell King (Oracle) July 25, 2023, 5:49 p.m. UTC | #6
Sorry, I don't have the original email to reply to, and this is the
first which includes most of the patch. This reply is primerily for
Ante Knezic.

On Tue, Jul 25, 2023 at 08:23:43PM +0300, Vladimir Oltean wrote:
> On Fri, Jul 21, 2023 at 12:26:18PM +0200, Ante Knezic wrote:
> > diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
> > index 98dd49dac421..50b14804c360 100644
> > --- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c
> > +++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
> > @@ -20,6 +20,7 @@ struct mv88e639x_pcs {
> >  	struct mdio_device mdio;
> >  	struct phylink_pcs sgmii_pcs;
> >  	struct phylink_pcs xg_pcs;
> > +	struct mv88e6xxx_chip *chip;

	bool erratum_3_14;

> >  	bool supports_5g;
> >  	phy_interface_t interface;
> >  	unsigned int irq;
> > @@ -205,13 +206,52 @@ static void mv88e639x_sgmii_pcs_pre_config(struct phylink_pcs *pcs,
> >  	mv88e639x_sgmii_pcs_control_pwr(mpcs, false);
> >  }
> >  
> > +static int mv88e6390_erratum_3_14(struct mv88e639x_pcs *mpcs)
> > +{
> > +	const int lanes[] = { MV88E6390_PORT9_LANE0, MV88E6390_PORT9_LANE1,
> > +		MV88E6390_PORT9_LANE2, MV88E6390_PORT9_LANE3,
> > +		MV88E6390_PORT10_LANE0, MV88E6390_PORT10_LANE1,
> > +		MV88E6390_PORT10_LANE2, MV88E6390_PORT10_LANE3 };
> > +	struct mdio_device mdio;
> > +	int err, i;
> > +
> > +	/* 88e6190x and 88e6390x errata 3.14:
> > +	 * After chip reset, SERDES reconfiguration or SERDES core
> > +	 * Software Reset, the SERDES lanes may not be properly aligned
> > +	 * resulting in CRC errors
> > +	 */

Does the errata say that _all_ lanes need this treatment, even when
they are not being used as a group (e.g. for XAUI) ?

> > +
> > +	mdio.bus = mpcs->mdio.bus;
> > +
> > +	for (i = 0; i < ARRAY_SIZE(lanes); i++) {
> > +		mdio.addr = lanes[i];
> > +
> > +		err = mdiodev_c45_write(&mdio, MDIO_MMD_PHYXS,
> > +					0xf054, 0x400C);
> > +		if (err)
> > +			return err;
> > +
> > +		err = mdiodev_c45_write(&mdio, MDIO_MMD_PHYXS,
> > +					0xf054, 0x4000);
> > +		if (err)
> > +			return err;
> 
> I'm not sure which way is preferred by PHY maintainers, but it seems to
> be a useless complication to simulate that you have a struct mdio_device
> for the other lanes when you don't. It appears more appropriate to just
> use mdiobus_c45_write(mpcs->mdio.bus, lanes[i]).
> 
> There's also the locking question (with the big caveat that we don't
> know what the register writes do!). There's locking at the bus level,
> but the MDIO device isn't locked. So phylink on those other PCSes can
> still do stuff, even in-between the first and the second write to
> undocumented register 0xf054.
> 
> I can speculate that writing 0x400c -> 0x4000 is something like: set
> RX_RESET | TX_RESET followed by clear RX_RESET | TX_RESET. Is it ok if
> stuff happens in between these writes - will it stick, or does this
> logically interact with anything else in any other way? I guess we won't
> know. I might be a bit closer to being okay with it if you could confirm
> that some other (unrelated) register write to the PCS does make it
> through (and can be read back) in between the 2 erratum writes.
> 
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  static int mv88e639x_sgmii_pcs_post_config(struct phylink_pcs *pcs,
> >  					   phy_interface_t interface)
> >  {
> >  	struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
> > +	struct mv88e6xxx_chip *chip = mpcs->chip;
> >  
> >  	mv88e639x_sgmii_pcs_control_pwr(mpcs, true);
> >  
> > +	if (chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6190X ||
> > +	    chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6390X)
> > +		mv88e6390_erratum_3_14(mpcs);

	int err;
...
	if (mpcs->erratum_3_14) {
		err = mv88e6390_erratum_3_14(mpcs);
		if (err)
			dev_err(mpcs->mdio.dev.parent,
				"failed to apply erratum 3.14: %pe\n",
				ERR_PTR(err));
	}

> 
> You could at least print an error if a write failure occurred, so that
> it doesn't go completely unnoticed.
> 
> > +
> >  	return 0;
> >  }
> >  
> > @@ -523,6 +563,7 @@ static int mv88e6390_pcs_init(struct mv88e6xxx_chip *chip, int port)
> >  	mpcs->sgmii_pcs.neg_mode = true;
> >  	mpcs->xg_pcs.ops = &mv88e6390_xg_pcs_ops;
> >  	mpcs->xg_pcs.neg_mode = true;
> > +	mpcs->chip = chip;

	if (chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6190X ||
	    chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6390X)
		mpcs->erratum_3_14 = true;

> >  
> >  	err = mv88e639x_pcs_setup_irq(mpcs, chip, port);
> >  	if (err)
> > @@ -873,6 +914,7 @@ static int mv88e6393x_pcs_init(struct mv88e6xxx_chip *chip, int port)
> >  	mpcs->xg_pcs.ops = &mv88e6393x_xg_pcs_ops;
> >  	mpcs->xg_pcs.neg_mode = true;
> >  	mpcs->supports_5g = true;
> > +	mpcs->chip = chip;

Presumably the 6393x isn't affected by this, so this is not necessary
with the above changes.
Ante Knezic July 26, 2023, 9:49 a.m. UTC | #7
On Tue, 25 Jul 2023 18:49:19 +0100 Russell King (Oracle) wrote:
> Does the errata say that _all_ lanes need this treatment, even when
> they are not being used as a group (e.g. for XAUI) ?

No, unfortunatelly errata says very little, I tried applying erratum only on the requested 
lane of port 9/10 but this did not work out as expected and the issue was still visible.
I dont have the necessary HW to perform more tests on other lanes unfortunatelly.

On Tue, 25 Jul 2023 18:49:19 +0100 Russell King (Oracle) wrote:
> On Tue, Jul 25, 2023 at 08:23:43PM +0300, Vladimir Oltean wrote:
> > On Fri, Jul 21, 2023 at 12:26:18PM +0200, Ante Knezic wrote:
> > > diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
> > > index 98dd49dac421..50b14804c360 100644
> > > --- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c
> > > +++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
> > > @@ -20,6 +20,7 @@ struct mv88e639x_pcs {
> > >  	struct mdio_device mdio;
> > >  	struct phylink_pcs sgmii_pcs;
> > >  	struct phylink_pcs xg_pcs;
> > > +	struct mv88e6xxx_chip *chip;
> 
> 	bool erratum_3_14;

...

> > >  static int mv88e639x_sgmii_pcs_post_config(struct phylink_pcs *pcs,
> > >  					   phy_interface_t interface)
> > >  {
> > >  	struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
> > > +	struct mv88e6xxx_chip *chip = mpcs->chip;
> > >  
> > >  	mv88e639x_sgmii_pcs_control_pwr(mpcs, true);
> > >  
> > > +	if (chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6190X ||
> > > +	    chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6390X)
> > > +		mv88e6390_erratum_3_14(mpcs);
> 
> 	int err;
> ...
> 	if (mpcs->erratum_3_14) {
> 		err = mv88e6390_erratum_3_14(mpcs);
> 		if (err)
> 			dev_err(mpcs->mdio.dev.parent,
> 				"failed to apply erratum 3.14: %pe\n",
> 				ERR_PTR(err));
> 	}
> 

So you propose to ditch the chip ptr from the mpcs and add a bool variable instead. But
isn't this too general - the errata applies only to 6190X and 6390X, other devices
might (and probably do) have errata 3.14 as something completely different? Possible new changes
(new errata, fixes etc) in the pcs-xxx.c might benefit from having a chip ptr more than 
using a bool variable "just" for one errata found on two device types?

> > >  
> > >  	err = mv88e639x_pcs_setup_irq(mpcs, chip, port);
> > >  	if (err)
> > > @@ -873,6 +914,7 @@ static int mv88e6393x_pcs_init(struct mv88e6xxx_chip *chip, int port)
> > >  	mpcs->xg_pcs.ops = &mv88e6393x_xg_pcs_ops;
> > >  	mpcs->xg_pcs.neg_mode = true;
> > >  	mpcs->supports_5g = true;
> > > +	mpcs->chip = chip;
> 
> Presumably the 6393x isn't affected by this, so this is not necessary
> with the above changes.

This was done merely for consistency, besides the memory is already reserved, why not point
it to something? In case of bool replacement it will not matter anymore.

Thanks,
	Ante
Ante Knezic July 26, 2023, 9:50 a.m. UTC | #8
On Tue, 25 Jul 2023 20:23:43 +0300 Vladimir Oltean wrote:
> I'm not sure which way is preferred by PHY maintainers, but it seems to
> be a useless complication to simulate that you have a struct mdio_device
> for the other lanes when you don't. It appears more appropriate to just
> use mdiobus_c45_write(mpcs->mdio.bus, lanes[i]).
> 

Agreed.

> There's also the locking question (with the big caveat that we don't
> know what the register writes do!). There's locking at the bus level,
> but the MDIO device isn't locked. So phylink on those other PCSes can
> still do stuff, even in-between the first and the second write to
> undocumented register 0xf054.
> 
> I can speculate that writing 0x400c -> 0x4000 is something like: set
> RX_RESET | TX_RESET followed by clear RX_RESET | TX_RESET. Is it ok if
> stuff happens in between these writes - will it stick, or does this
> logically interact with anything else in any other way? I guess we won't
> know. I might be a bit closer to being okay with it if you could confirm
> that some other (unrelated) register write to the PCS does make it
> through (and can be read back) in between the 2 erratum writes.

I was able to confirm this by successfully reading and writing to the 
SGMII_BMCR register between erratum writes. This did not affect the issue
that erratum fixes. Unfortunatelly, there is no info about what the
actuall writing to magic registers does.

>>  static int mv88e639x_sgmii_pcs_post_config(struct phylink_pcs *pcs,
>>  	                                   phy_interface_t interface)
>>  {
>>  	struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
>> +	struct mv88e6xxx_chip *chip = mpcs->chip;
>>  
>>  	mv88e639x_sgmii_pcs_control_pwr(mpcs, true);
>>  
>> +	if (chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6190X ||
>> +	    chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6390X)
>> +	        mv88e6390_erratum_3_14(mpcs);
>
>You could at least print an error if a write failure occurred, so that
>it doesn't go completely unnoticed.

Ok, I was simply following the above notion (we don't check or print 
errors when powering on the serdes lane) but I agree with your point and 
will adapt the patch for the next version.
Russell King (Oracle) July 26, 2023, 9:53 a.m. UTC | #9
On Wed, Jul 26, 2023 at 11:49:35AM +0200, Ante Knezic wrote:
> On Tue, 25 Jul 2023 18:49:19 +0100 Russell King (Oracle) wrote:
> > Does the errata say that _all_ lanes need this treatment, even when
> > they are not being used as a group (e.g. for XAUI) ?
> 
> No, unfortunatelly errata says very little, I tried applying erratum only on the requested 
> lane of port 9/10 but this did not work out as expected and the issue was still visible.
> I dont have the necessary HW to perform more tests on other lanes unfortunatelly.
> 
> On Tue, 25 Jul 2023 18:49:19 +0100 Russell King (Oracle) wrote:
> > On Tue, Jul 25, 2023 at 08:23:43PM +0300, Vladimir Oltean wrote:
> > > On Fri, Jul 21, 2023 at 12:26:18PM +0200, Ante Knezic wrote:
> > > > diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
> > > > index 98dd49dac421..50b14804c360 100644
> > > > --- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c
> > > > +++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
> > > > @@ -20,6 +20,7 @@ struct mv88e639x_pcs {
> > > >  	struct mdio_device mdio;
> > > >  	struct phylink_pcs sgmii_pcs;
> > > >  	struct phylink_pcs xg_pcs;
> > > > +	struct mv88e6xxx_chip *chip;
> > 
> > 	bool erratum_3_14;
> 
> ...
> 
> > > >  static int mv88e639x_sgmii_pcs_post_config(struct phylink_pcs *pcs,
> > > >  					   phy_interface_t interface)
> > > >  {
> > > >  	struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
> > > > +	struct mv88e6xxx_chip *chip = mpcs->chip;
> > > >  
> > > >  	mv88e639x_sgmii_pcs_control_pwr(mpcs, true);
> > > >  
> > > > +	if (chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6190X ||
> > > > +	    chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6390X)
> > > > +		mv88e6390_erratum_3_14(mpcs);
> > 
> > 	int err;
> > ...
> > 	if (mpcs->erratum_3_14) {
> > 		err = mv88e6390_erratum_3_14(mpcs);
> > 		if (err)
> > 			dev_err(mpcs->mdio.dev.parent,
> > 				"failed to apply erratum 3.14: %pe\n",
> > 				ERR_PTR(err));
> > 	}
> > 
> 
> So you propose to ditch the chip ptr from the mpcs and add a bool variable instead. But
> isn't this too general - the errata applies only to 6190X and 6390X, other devices
> might (and probably do) have errata 3.14 as something completely different? Possible new changes
> (new errata, fixes etc) in the pcs-xxx.c might benefit from having a chip ptr more than 
> using a bool variable "just" for one errata found on two device types?

As a longer term goal, I would like to move the pcs drivers out of
mv88e6xxx and into drivers/net/pcs, so I want to minimise the use of
the "chip" pointer in the drivers. That's why I coded them the way I
have, as almost entirely stand-alone implementations that make no use
of the hardware accessors provided by the 88e6xxx core.
Ante Knezic July 26, 2023, 10:03 a.m. UTC | #10
On Wed, 26 Jul 2023 10:53:17 +0100, Russell King (Oracle) wrote:
> As a longer term goal, I would like to move the pcs drivers out of
> mv88e6xxx and into drivers/net/pcs, so I want to minimise the use of
> the "chip" pointer in the drivers. That's why I coded them the way I
> have, as almost entirely stand-alone implementations that make no use
> of the hardware accessors provided by the 88e6xxx core.

Understood, I will adapt the patch as you proposed.
diff mbox series

Patch

diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
index 98dd49dac421..50b14804c360 100644
--- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c
+++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
@@ -20,6 +20,7 @@  struct mv88e639x_pcs {
 	struct mdio_device mdio;
 	struct phylink_pcs sgmii_pcs;
 	struct phylink_pcs xg_pcs;
+	struct mv88e6xxx_chip *chip;
 	bool supports_5g;
 	phy_interface_t interface;
 	unsigned int irq;
@@ -205,13 +206,52 @@  static void mv88e639x_sgmii_pcs_pre_config(struct phylink_pcs *pcs,
 	mv88e639x_sgmii_pcs_control_pwr(mpcs, false);
 }
 
+static int mv88e6390_erratum_3_14(struct mv88e639x_pcs *mpcs)
+{
+	const int lanes[] = { MV88E6390_PORT9_LANE0, MV88E6390_PORT9_LANE1,
+		MV88E6390_PORT9_LANE2, MV88E6390_PORT9_LANE3,
+		MV88E6390_PORT10_LANE0, MV88E6390_PORT10_LANE1,
+		MV88E6390_PORT10_LANE2, MV88E6390_PORT10_LANE3 };
+	struct mdio_device mdio;
+	int err, i;
+
+	/* 88e6190x and 88e6390x errata 3.14:
+	 * After chip reset, SERDES reconfiguration or SERDES core
+	 * Software Reset, the SERDES lanes may not be properly aligned
+	 * resulting in CRC errors
+	 */
+
+	mdio.bus = mpcs->mdio.bus;
+
+	for (i = 0; i < ARRAY_SIZE(lanes); i++) {
+		mdio.addr = lanes[i];
+
+		err = mdiodev_c45_write(&mdio, MDIO_MMD_PHYXS,
+					0xf054, 0x400C);
+		if (err)
+			return err;
+
+		err = mdiodev_c45_write(&mdio, MDIO_MMD_PHYXS,
+					0xf054, 0x4000);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int mv88e639x_sgmii_pcs_post_config(struct phylink_pcs *pcs,
 					   phy_interface_t interface)
 {
 	struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
+	struct mv88e6xxx_chip *chip = mpcs->chip;
 
 	mv88e639x_sgmii_pcs_control_pwr(mpcs, true);
 
+	if (chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6190X ||
+	    chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6390X)
+		mv88e6390_erratum_3_14(mpcs);
+
 	return 0;
 }
 
@@ -523,6 +563,7 @@  static int mv88e6390_pcs_init(struct mv88e6xxx_chip *chip, int port)
 	mpcs->sgmii_pcs.neg_mode = true;
 	mpcs->xg_pcs.ops = &mv88e6390_xg_pcs_ops;
 	mpcs->xg_pcs.neg_mode = true;
+	mpcs->chip = chip;
 
 	err = mv88e639x_pcs_setup_irq(mpcs, chip, port);
 	if (err)
@@ -873,6 +914,7 @@  static int mv88e6393x_pcs_init(struct mv88e6xxx_chip *chip, int port)
 	mpcs->xg_pcs.ops = &mv88e6393x_xg_pcs_ops;
 	mpcs->xg_pcs.neg_mode = true;
 	mpcs->supports_5g = true;
+	mpcs->chip = chip;
 
 	err = mv88e6393x_erratum_4_6(mpcs);
 	if (err)