diff mbox series

[RFC,net-next,1/2] net: pcs: xpcs: Add special code to operate in Microchip KSZ9477 switch

Message ID 20250128033226.70866-2-Tristram.Ha@microchip.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series Add SGMII port support to KSZ9477 switch | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success Errors and warnings before: 26 (+1) this patch: 26 (+1)
netdev/cc_maintainers warning 1 maintainers not CCed: fancer.lancer@gmail.com
netdev/build_clang success Errors and warnings before: 66 this patch: 66
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 10 this patch: 10
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 122 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Tristram.Ha@microchip.com Jan. 28, 2025, 3:32 a.m. UTC
From: Tristram Ha <tristram.ha@microchip.com>

It was recommended to use this XPCS driver for Microchip KSZ9477 switch
to operate its SGMII port as the SGMII implementation uses the same
Synopsys DesignWare IP, but current code does not work in some cases.
The only solution is to add a quirk field and use that to operate KSZ
specific code.

For 1000BaseX mode setting neg_mode to false works, but that does not
work for SGMII mode.  Setting 0x18 value in register 0x1f8001 allows
1000BaseX mode to work with auto-negotiation enabled.

However SGMII mode in KSZ9477 has a bug in which the current speed
needs to be set in MII_BMCR to pass traffic.  The current driver code
does not do anything when link is up with auto-negotiation enabled, so
that code needs to be changed for KSZ9477.

Signed-off-by: Tristram Ha <tristram.ha@microchip.com>
---
 drivers/net/pcs/pcs-xpcs.c   | 52 ++++++++++++++++++++++++++++++++++--
 drivers/net/pcs/pcs-xpcs.h   |  2 ++
 include/linux/pcs/pcs-xpcs.h |  6 +++++
 3 files changed, 58 insertions(+), 2 deletions(-)

Comments

Russell King (Oracle) Jan. 28, 2025, 9:24 a.m. UTC | #1
On Mon, Jan 27, 2025 at 07:32:25PM -0800, Tristram.Ha@microchip.com wrote:
> For 1000BaseX mode setting neg_mode to false works, but that does not
> work for SGMII mode.  Setting 0x18 value in register 0x1f8001 allows
> 1000BaseX mode to work with auto-negotiation enabled.

I'm not sure (a) exactly what the above paragraph is trying to tell me,
and (b) why setting the AN control register to 0x18, which should only
affect SGMII, has an effect on 1000BASE-X.

Note that a config word formatted for SGMII can result in a link with
1000BASE-X to come up, but it is not correct. So, I highly recommend you
check the config word sent by the XPCS to the other end of the link.
Bit 0 of that will tell you whether it is SGMII-formatted or 802.3z
formatted.

> However SGMII mode in KSZ9477 has a bug in which the current speed
> needs to be set in MII_BMCR to pass traffic.  The current driver code
> does not do anything when link is up with auto-negotiation enabled, so
> that code needs to be changed for KSZ9477.
> 
> Signed-off-by: Tristram Ha <tristram.ha@microchip.com>
> ---
>  drivers/net/pcs/pcs-xpcs.c   | 52 ++++++++++++++++++++++++++++++++++--
>  drivers/net/pcs/pcs-xpcs.h   |  2 ++
>  include/linux/pcs/pcs-xpcs.h |  6 +++++
>  3 files changed, 58 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
> index 1faa37f0e7b9..ddf6cd4b37a7 100644
> --- a/drivers/net/pcs/pcs-xpcs.c
> +++ b/drivers/net/pcs/pcs-xpcs.c
> @@ -768,6 +768,14 @@ static int xpcs_config_aneg_c37_1000basex(struct dw_xpcs *xpcs,
>  		val |= DW_VR_MII_AN_INTR_EN;
>  	}
>  
> +	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
> +	    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) {
> +		mask |= DW_VR_MII_SGMII_LINK_STS | DW_VR_MII_TX_CONFIG_MASK;
> +		val |= FIELD_PREP(DW_VR_MII_TX_CONFIG_MASK,
> +				  DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII);
> +		val |= DW_VR_MII_SGMII_LINK_STS;
> +	}
> +
>  	ret = xpcs_modify(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL, mask, val);
>  	if (ret < 0)
>  		return ret;
> @@ -982,6 +990,15 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs,
>  	if (ret < 0)
>  		return ret;
>  
> +	/* DW_VR_MII_AN_STS_C37_ANCMPLT_INTR just means link change in SGMII
> +	 * mode, so needs to be cleared.  It can appear just by itself, which
> +	 * does not mean the link is up.
> +	 */
> +	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
> +	    (ret & DW_VR_MII_AN_STS_C37_ANCMPLT_INTR)) {
> +		ret &= ~DW_VR_MII_AN_STS_C37_ANCMPLT_INTR;
> +		xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS, 0);
> +	}

*_get_state() is not an interrupt acknowledgement function. It isn't
_necessarily_ called when an interrupt has happened, and it will be
called "sometime after" the interrupt has been handled as it's called
from an entirely separate workqueue.

Would it be better to just ignore the block following:

	} else if (ret == DW_VR_MII_AN_STS_C37_ANCMPLT_INTR) {

instead? I'm not sure that block of code/if statement was correct,
and was added in "net: pcs: xpcs: adapt Wangxun NICs for SGMII mode".

>  	if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) {
>  		int speed_value;
>  
> @@ -1037,6 +1054,18 @@ static int xpcs_get_state_c37_1000basex(struct dw_xpcs *xpcs,
>  {
>  	int lpa, bmsr;
>  
> +	/* DW_VR_MII_AN_STS_C37_ANCMPLT_INTR just means link change, so needs
> +	 * to be cleared.  If polling is not used then it is cleared by
> +	 * following code.
> +	 */
> +	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ && xpcs->pcs.poll) {
> +		int val;
> +
> +		val = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS);
> +		if (val & DW_VR_MII_AN_STS_C37_ANCMPLT_INTR)
> +			xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS,
> +				   0);
> +	}

Same concerns.

>  	if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
>  			      state->advertising)) {
>  		/* Reset link state */
> @@ -1138,9 +1167,14 @@ static void xpcs_link_up_sgmii_1000basex(struct dw_xpcs *xpcs,
>  					 phy_interface_t interface,
>  					 int speed, int duplex)
>  {
> +	u16 val = 0;
>  	int ret;
>  
> -	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
> +	/* Microchip KSZ SGMII implementation has a bug that needs MII_BMCR
> +	 * register to be updated with current speed to pass traffic.
> +	 */
> +	if (xpcs->quirk != DW_XPCS_QUIRK_MICROCHIP_KSZ &&

	if (!(xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
	      interface != PHY_INTERFACE_MODE_SGMII) &&

> +	    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
>  		return;
>  
>  	if (interface == PHY_INTERFACE_MODE_1000BASEX) {
> @@ -1155,10 +1189,18 @@ static void xpcs_link_up_sgmii_1000basex(struct dw_xpcs *xpcs,
>  			dev_err(&xpcs->mdiodev->dev,
>  				"%s: half duplex not supported\n",
>  				__func__);
> +
> +		/* No need to update MII_BMCR register if not in SGMII mode. */
> +		if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
> +		    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
> +			return;

then you don't need this.

>  	}
>  
> +	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
> +	    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
> +		val = BMCR_ANENABLE;
>  	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MII_BMCR,
> -			 mii_bmcr_encode_fixed(speed, duplex));
> +			 val | mii_bmcr_encode_fixed(speed, duplex));

I think in this case, I'd prefer this to just modify the speed and
duplex bits rather than writing the whole register.

>  	if (ret)
>  		dev_err(&xpcs->mdiodev->dev, "%s: xpcs_write returned %pe\n",
>  			__func__, ERR_PTR(ret));
> @@ -1563,5 +1605,11 @@ void xpcs_destroy_pcs(struct phylink_pcs *pcs)
>  }
>  EXPORT_SYMBOL_GPL(xpcs_destroy_pcs);
>  
> +void xpcs_set_quirk(struct dw_xpcs *xpcs, int quirk)
> +{
> +	xpcs->quirk = quirk;
> +}
> +EXPORT_SYMBOL_GPL(xpcs_set_quirk);

According to the KSZ9477 data, the physid is 0x7996ced0 (which is the
DW value according to the xpcs header file). We also read the PMA ID
(xpcs->info.pma). Can this be used to identify the KSZ9477 without
introducing quirks?

I would prefer to avoid going down the route of introducing a quirk
mask - that seems to be a recipe to breed lots of flags that make
long term maintenance more difficult.
Vladimir Oltean Jan. 28, 2025, 10:21 a.m. UTC | #2
On Tue, Jan 28, 2025 at 09:24:45AM +0000, Russell King (Oracle) wrote:
> On Mon, Jan 27, 2025 at 07:32:25PM -0800, Tristram.Ha@microchip.com wrote:
> > For 1000BaseX mode setting neg_mode to false works, but that does not
> > work for SGMII mode.  Setting 0x18 value in register 0x1f8001 allows
> > 1000BaseX mode to work with auto-negotiation enabled.
> 
> I'm not sure (a) exactly what the above paragraph is trying to tell me,
> and (b) why setting the AN control register to 0x18, which should only
> affect SGMII, has an effect on 1000BASE-X.
> 
> Note that a config word formatted for SGMII can result in a link with
> 1000BASE-X to come up, but it is not correct. So, I highly recommend you
> check the config word sent by the XPCS to the other end of the link.
> Bit 0 of that will tell you whether it is SGMII-formatted or 802.3z
> formatted.

I, too, am concerned about the sentence "setting neg_mode to false works".
If this is talking about the only neg_mode field that is a boolean, aka
struct phylink_pcs :: neg_mode, then setting it to false is not
something driver customizable, it should be true for API compliance,
and all that remains false in current kernel trees will eventually get
converted to true, AFAIU. If 1000BaseX works by setting xpcs->pcs.neg_mode
to false and not modifying anything else, it should be purely a
coincidence that it "works", since that makes the driver comparisons
with PHYLINK_PCS_NEG_* constants meaningless.

> According to the KSZ9477 data, the physid is 0x7996ced0 (which is the
> DW value according to the xpcs header file). We also read the PMA ID
> (xpcs->info.pma). Can this be used to identify the KSZ9477 without
> introducing quirks?

If nothing else works, and it turns out that different IP integrations
report the same value in ID registers but need different handling, then
in principle the hack approach is also on the table. SJA1105, whose
hardware reads zeroes for the ID registers, reports a fake and unique ID
for the XPCS to identify it, because it, like the KSZ9477 driver, is in
control of the MDIO read operations and can selectively manipulate their
result.
Russell King (Oracle) Jan. 28, 2025, 12:33 p.m. UTC | #3
On Tue, Jan 28, 2025 at 12:21:28PM +0200, Vladimir Oltean wrote:
> On Tue, Jan 28, 2025 at 09:24:45AM +0000, Russell King (Oracle) wrote:
> > On Mon, Jan 27, 2025 at 07:32:25PM -0800, Tristram.Ha@microchip.com wrote:
> > > For 1000BaseX mode setting neg_mode to false works, but that does not
> > > work for SGMII mode.  Setting 0x18 value in register 0x1f8001 allows
> > > 1000BaseX mode to work with auto-negotiation enabled.
> > 
> > I'm not sure (a) exactly what the above paragraph is trying to tell me,
> > and (b) why setting the AN control register to 0x18, which should only
> > affect SGMII, has an effect on 1000BASE-X.
> > 
> > Note that a config word formatted for SGMII can result in a link with
> > 1000BASE-X to come up, but it is not correct. So, I highly recommend you
> > check the config word sent by the XPCS to the other end of the link.
> > Bit 0 of that will tell you whether it is SGMII-formatted or 802.3z
> > formatted.
> 
> I, too, am concerned about the sentence "setting neg_mode to false works".
> If this is talking about the only neg_mode field that is a boolean, aka
> struct phylink_pcs :: neg_mode, then setting it to false is not
> something driver customizable, it should be true for API compliance,
> and all that remains false in current kernel trees will eventually get
> converted to true, AFAIU. If 1000BaseX works by setting xpcs->pcs.neg_mode
> to false and not modifying anything else, it should be purely a
> coincidence that it "works", since that makes the driver comparisons
> with PHYLINK_PCS_NEG_* constants meaningless.
> 
> > According to the KSZ9477 data, the physid is 0x7996ced0 (which is the
> > DW value according to the xpcs header file). We also read the PMA ID
> > (xpcs->info.pma). Can this be used to identify the KSZ9477 without
> > introducing quirks?
> 
> If nothing else works, and it turns out that different IP integrations
> report the same value in ID registers but need different handling, then
> in principle the hack approach is also on the table. SJA1105, whose
> hardware reads zeroes for the ID registers, reports a fake and unique ID
> for the XPCS to identify it, because it, like the KSZ9477 driver, is in
> control of the MDIO read operations and can selectively manipulate their
> result.

Further review of the KS9477 documentation finds this:

5.5.9 SGMII AUTO-NEGOTIATION CONTROL REGISTER

"After making changes to this register, the changes don’t take effect
until SGMII Auto-Negotiation Advertisement Register is written."

In xpcs_config_aneg_c37_sgmii() we have:

        ret = xpcs_modify(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL, mask, val);
        if (ret < 0)
                return ret;

However, MII_ADVERTISE in MDIO_MMD_VEND2 is not written by this
function. If the documentation is correct, then this has no effect
on KS9477, and could be part of the problem.

I notice the SJA1105 doesn't make any similar statement, so I wonder
what the original Synopsys documentation says about the AN control
register.

Note that xpcs_config_aneg_c37_1000basex() does also write this
register, but it is followed by a write to the MII_ADVERTISE
register.
Russell King (Oracle) Jan. 28, 2025, 12:40 p.m. UTC | #4
On Tue, Jan 28, 2025 at 12:21:28PM +0200, Vladimir Oltean wrote:
> On Tue, Jan 28, 2025 at 09:24:45AM +0000, Russell King (Oracle) wrote:
> > On Mon, Jan 27, 2025 at 07:32:25PM -0800, Tristram.Ha@microchip.com wrote:
> > > For 1000BaseX mode setting neg_mode to false works, but that does not
> > > work for SGMII mode.  Setting 0x18 value in register 0x1f8001 allows
> > > 1000BaseX mode to work with auto-negotiation enabled.
> > 
> > I'm not sure (a) exactly what the above paragraph is trying to tell me,
> > and (b) why setting the AN control register to 0x18, which should only
> > affect SGMII, has an effect on 1000BASE-X.
> > 
> > Note that a config word formatted for SGMII can result in a link with
> > 1000BASE-X to come up, but it is not correct. So, I highly recommend you
> > check the config word sent by the XPCS to the other end of the link.
> > Bit 0 of that will tell you whether it is SGMII-formatted or 802.3z
> > formatted.
> 
> I, too, am concerned about the sentence "setting neg_mode to false works".
> If this is talking about the only neg_mode field that is a boolean, aka
> struct phylink_pcs :: neg_mode, then setting it to false is not
> something driver customizable, it should be true for API compliance,
> and all that remains false in current kernel trees will eventually get
> converted to true, AFAIU. If 1000BaseX works by setting xpcs->pcs.neg_mode
> to false and not modifying anything else, it should be purely a
> coincidence that it "works", since that makes the driver comparisons
> with PHYLINK_PCS_NEG_* constants meaningless.

Another related issue that concerns me is:

         * Note: Since it is MAC side SGMII, there is no need to set
         *       SR_MII_AN_ADV. MAC side SGMII receives AN Tx Config from
         *       PHY about the link state change after C28 AN is completed
         *       between PHY and Link Partner. There is also no need to
         *       trigger AN restart for MAC-side SGMII.

However, 2a22b7ae2fa3 ("net: pcs: xpcs: adapt Wangxun NICs for SGMII
mode") added support for switching this to PHY-side SGMII, so this
comment is no longer true.

Again, I still wonder whether PHY-side is some kind of hack despite
my queries during the review of that change. Sadly, I didn't catch
that comment (and until recently, I didn't have any documentation
on these registers. I now have the KS9477 and SJA1105 manuals that
document some of the register set.)
Andrew Lunn Jan. 28, 2025, 1:16 p.m. UTC | #5
> For 1000BaseX mode setting neg_mode to false works, but that does not
> work for SGMII mode.  Setting 0x18 value in register 0x1f8001 allows
> 1000BaseX mode to work with auto-negotiation enabled.

Unless you have the datasheet, writing 0x18 to 0x1f8001 is pretty
meaningless. You need to explain what these two bits mean in this
register.

	Andrew
Russell King (Oracle) Jan. 28, 2025, 1:39 p.m. UTC | #6
On Tue, Jan 28, 2025 at 02:16:28PM +0100, Andrew Lunn wrote:
> > For 1000BaseX mode setting neg_mode to false works, but that does not
> > work for SGMII mode.  Setting 0x18 value in register 0x1f8001 allows
> > 1000BaseX mode to work with auto-negotiation enabled.
> 
> Unless you have the datasheet, writing 0x18 to 0x1f8001 is pretty
> meaningless. You need to explain what these two bits mean in this
> register.

Well, this is the reason I searched for the datasheet to find out
what it was, and discovered further information which suggests
other stuff is wrong in the current driver.

bit 4: SGMII link status. This is used to populate the SGMII tx_config
register bit 15 when XPCS is operating in PHY mode. KSZ9477
documentation states that this bit must be set in "SerDes" mode, aka
1000base-X. If that requirement comes from Synopsys, then the current
XPCS driver is buggy...

bit 3: Transmit configuration. In SGMII mode, selects between PHY mode
(=1) or MAC mode (=0). KSZ9477 documentation states that this bit must
be set when operating in "SerDes" mode. (Same concern as for bit 4.)

I will also note here that bits 2:1 are documented as 00=SerDes mode,
10=SGMII mode.

Cross-referencing with the SJA1105 documentation, Digital Control
Register 1 bit 0 = 0 gives a tx_config format of:

	tx_config[15] = comes from SGMII link status above
	tx_config[12] = MII_ADVERTISE.bit5 (which, even though operating
			in SGMII mode, MII_ADVERTISE is 802.3z format.)
	tx_config[11:10] = MII_BMCR speed bits

As stated elsewhere, changes to the AN control register in KSZ9477
are documented as only taking effect when the MII_ADVERTISE register
is subsequently written (which the driver doesn't do, nor does this
patch!)

The lack of access to Synopsys Designware XPCS documentation makes
working out how to properly drive this hardware problematical. We're
subject to the randomness of the register set documentation that can
be found in various chip manufacturers who publish it.
Vladimir Oltean Jan. 28, 2025, 3:23 p.m. UTC | #7
On Tue, Jan 28, 2025 at 12:33:11PM +0000, Russell King (Oracle) wrote:
> I wonder what the original Synopsys documentation says about the AN
> control register.

My private XPCS databook 2017-12.pdf, the applicability of which I cannot
vouch for, has a section with programming guidelines for SGMII
Auto-Negotiation. I quote:

| Clause 37 auto-negotiation can be performed in the SGMII mode by
| programming various registers as follows:
| 
| 1. In configurations with both 10G and 1G mode speed mode, switch
|    DWC_xpcs to 1G speed mode by following the steps described in
|    "Switching to 1G or KX Mode and 10 or 100 Mbps SGMII Speed".
| 
| 2. In Backplane Ethernet PCS configurations, program bit[12] (AN_EN) of
|    SR_AN_CTRL Register to 0 and bit[12] (CL37_BP) of VR_XS_PCS_DIG_CTRL1
|    Register to 1.
| 
| 3. Disable Clause 37 auto-negotiation by programming bit [12]
|    (AN_ENABLE) of SR_MII_CTRL Register to 0 (in case it is already
|    enabled).
| 
| 4. Program various fields of VR_MII_AN_CTRL Register appropriately as
|    follows:
|    - Program PCS_MODE to 2’b10
|    - Program TX_CONFIG to 1 (PHY side SGMII) or 0 (MAC side SGMII) based
|      on your requirement
|    - Program MII_AN_INTR_EN to 1, to enable auto-negotiation complete
|      interrupt
|    - If TX_CONFIG is set to 1 and bit[0] of VR_MII_DIG_CTRL1 Register is
|      set to 0, program SGMII_LINK_STS to indicate the link status to the
|      MAC side SGMII.
|    - Program MII_CTRL to 0 or 1, as per your requirement.
| 
| 5. If DWC_xpcs is configured as PHY-side SGMII in the above step, you
|    can program bit [0] of VR_MII_DIG_CTRL1 Register to 1, if you wish to
|    use the values of the xpcs_sgmii_link_sts_i input ports.
|    xpcs_sgmii_full_duplex_i and xpcs_sgmii_link_speed_i as the
|    transmitted configuration word.
| 
| 6. If DWC_xpcs is configured as PHY-side SGMII and if bit[0] of
|    VR_MII_DIG_CTRL1 Register is set to 0,
|    - Program SS13 and SS6 bits of SR_MII_CTRL Register to the required
|      SGMII Speed
|    - Program bit [5] (FD) of SR_MII_AN_ADV to the desired mode. This
|      step is mandatory even if you wish to leave the FD register bit to
|      its default value.
| 
| 7. If DWC_xpcs is configured as MAC-side SGMII in step 4, program bit[9]
|    of VR_MII_DIG_CTRL1 Register to 1, for DWC_xpcs to automatically
|    switch to the negotiated link-speed, after the completion of
|    auto-negotiation.
| 
| 8. Enable CL37 Auto-negotiation, by programming bit[12] of the
|    SR_MII_CTRL Register to 1.

In my reading of these steps, writing to DW_VR_MII_AN_CTRL does not
depend on a subsequent write to SR_MII_AN_ADV to take effect.
But there is this additional note in the databook:

| If TX_CONFIG=1 and Bit[0] (SGMII_PHY_MODE_CTRL) of VR_MII_DIG_CTRL1 = 0,
| program the SR_MII_AN_ADV only after programming 'SGMII_LINK_STS' bit
| (of VR_MII_AN_CTRL) and SS13 and SS6 bits (of SR_MII_CTRL)

So my understanding is that SR_MII_AN_ADV needs to be written only if
TX_CONFIG=1 (SJA1105 calls this AUTONEG_CONTROL[PHY_MODE]). That's quite
different, and that will make sense when you consider that there's also
a table with the places the autoneg code word gets its info from:

Config_Reg Bits in the 1000BASE-X and SGMII Mode

 +----------------+-------------------+--------------------+--------------------------------------------+
 | Config_Reg bit | 1000Base-X mode   | SGMII mode value   | SGMII mode value                           |
 |                |                   | when TX_CONFIG = 0 | when TX_CONFIG = 1                         |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 15             | Next page support | 0                  | Link up or down.                           |
 |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
 |                |                   |                    | this bit is derived from Bit 4             |
 |                |                   |                    | (SGMII_LINK_STS) of the VR_MII_AN_CTRL.    |
 |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
 |                |                   |                    | this bit is derived from the input port    |
 |                |                   |                    | 'xpcs_sgmii_link_sts_i'                    |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 14             | ACK               | 1                  | 1                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 13             | RF[1]             | 0                  | 0                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 12             | RF[0]             | 0                  | FULL_DUPLEX                                |
 |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
 |                |                   |                    | this bit is derived from Bit 5 (FD) of     |
 |                |                   |                    | the SR_MII_AN_ADV.                         |
 |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
 |                |                   |                    | this bit is derived from the input port    |
 |                |                   |                    | 'xpcs_sgmii_full_duplex_i'                 |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 11:10          | Reserved          | 0                  | SPEED                                      |
 |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
 |                |                   |                    | these bits are derived from Bit 13 (SS13)  |
 |                |                   |                    | and Bit 6 (SS6) of the SR_MII_CTRL.        |
 |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
 |                |                   |                    | this bit is derived from the input port    |
 |                |                   |                    | 'xpcs_sgmii_link_speed_i[1:0]'             |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 9              | Reserved          | 0                  | 0                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 8:7            | PAUSE[1:0]        | 0                  | 0                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 6              | HALF_DUPLEX       | 0                  | 0                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 5              | FULL_DUPLEX       | 0                  | 0                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 4:1            | Reserved          | 0                  | 0                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 0              | Reserved          | 1                  | 1                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+

I haven't figured out either what might be going on with the KSZ9477
integration, I just made a quick refresher and I thought this might be
useful for you as well, Russell. I do notice Tristram does force
TX_CONFIG=1 (DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII), but I don't understand
what's truly behind that. Hopefully just a misunderstanding.

Tristram, why do you set this field to 1? Linux only supports the
configuration where a MAC behaves like a MAC. There needs to be an
entire discussion if you want to configure a MAC to be a SGMII autoneg
master (like a PHY), how to model that.

Could you confirm that the requirement to program the SGMII
Auto-Negotiation Advertisement Register only exists if the Transmit
Configuration Master field is programmed to 1, like you are doing?
Vladimir Oltean Jan. 28, 2025, 3:43 p.m. UTC | #8
On Mon, Jan 27, 2025 at 07:32:25PM -0800, Tristram.Ha@microchip.com wrote:
> However SGMII mode in KSZ9477 has a bug in which the current speed
> needs to be set in MII_BMCR to pass traffic.  The current driver code
> does not do anything when link is up with auto-negotiation enabled, so
> that code needs to be changed for KSZ9477.

Does this claimed SGMII bug have an erratum number like Microchip usually
assign for something this serious? Is it something we can look up?
Russell King (Oracle) Jan. 28, 2025, 4:32 p.m. UTC | #9
On Tue, Jan 28, 2025 at 05:23:24PM +0200, Vladimir Oltean wrote:
> On Tue, Jan 28, 2025 at 12:33:11PM +0000, Russell King (Oracle) wrote:
> > I wonder what the original Synopsys documentation says about the AN
> > control register.
> 
> My private XPCS databook 2017-12.pdf, the applicability of which I cannot
> vouch for, has a section with programming guidelines for SGMII
> Auto-Negotiation. I quote:

Thanks for this, most useful.

> | Clause 37 auto-negotiation can be performed in the SGMII mode by
> | programming various registers as follows:
> | 
> | 1. In configurations with both 10G and 1G mode speed mode, switch
> |    DWC_xpcs to 1G speed mode by following the steps described in
> |    "Switching to 1G or KX Mode and 10 or 100 Mbps SGMII Speed".
> | 
> | 2. In Backplane Ethernet PCS configurations, program bit[12] (AN_EN) of
> |    SR_AN_CTRL Register to 0 and bit[12] (CL37_BP) of VR_XS_PCS_DIG_CTRL1
> |    Register to 1.
> | 
> | 3. Disable Clause 37 auto-negotiation by programming bit [12]
> |    (AN_ENABLE) of SR_MII_CTRL Register to 0 (in case it is already
> |    enabled).
> | 
> | 4. Program various fields of VR_MII_AN_CTRL Register appropriately as
> |    follows:
> |    - Program PCS_MODE to 2’b10
> |    - Program TX_CONFIG to 1 (PHY side SGMII) or 0 (MAC side SGMII) based
> |      on your requirement
> |    - Program MII_AN_INTR_EN to 1, to enable auto-negotiation complete
> |      interrupt
> |    - If TX_CONFIG is set to 1 and bit[0] of VR_MII_DIG_CTRL1 Register is
> |      set to 0, program SGMII_LINK_STS to indicate the link status to the
> |      MAC side SGMII.
> |    - Program MII_CTRL to 0 or 1, as per your requirement.
> | 
> | 5. If DWC_xpcs is configured as PHY-side SGMII in the above step, you
> |    can program bit [0] of VR_MII_DIG_CTRL1 Register to 1, if you wish to
> |    use the values of the xpcs_sgmii_link_sts_i input ports.
> |    xpcs_sgmii_full_duplex_i and xpcs_sgmii_link_speed_i as the
> |    transmitted configuration word.
> | 
> | 6. If DWC_xpcs is configured as PHY-side SGMII and if bit[0] of
> |    VR_MII_DIG_CTRL1 Register is set to 0,
> |    - Program SS13 and SS6 bits of SR_MII_CTRL Register to the required
> |      SGMII Speed
> |    - Program bit [5] (FD) of SR_MII_AN_ADV to the desired mode. This
> |      step is mandatory even if you wish to leave the FD register bit to
> |      its default value.

I suspect this is where the requirement comes from in the KSZ9477
documentation - and it's been "translated" badly. I note that in the
KSZ9477 documentation, bit[0] of VR_MII_DIG_CTRL1 is marked as
"reserved" and takes the value zero, so in the case of PHY-side SGMII,
(6) always applies to KSZ9477 and (5) never does.

This also solves my concern about 2a22b7ae2fa3 ("net: pcs: xpcs: adapt
Wangxun NICs for SGMII mode"), because there (5) would apply.

> | 
> | 7. If DWC_xpcs is configured as MAC-side SGMII in step 4, program bit[9]
> |    of VR_MII_DIG_CTRL1 Register to 1, for DWC_xpcs to automatically
> |    switch to the negotiated link-speed, after the completion of
> |    auto-negotiation.
> | 
> | 8. Enable CL37 Auto-negotiation, by programming bit[12] of the
> |    SR_MII_CTRL Register to 1.
> 
> In my reading of these steps, writing to DW_VR_MII_AN_CTRL does not
> depend on a subsequent write to SR_MII_AN_ADV to take effect.
> But there is this additional note in the databook:
> 
> | If TX_CONFIG=1 and Bit[0] (SGMII_PHY_MODE_CTRL) of VR_MII_DIG_CTRL1 = 0,
> | program the SR_MII_AN_ADV only after programming 'SGMII_LINK_STS' bit
> | (of VR_MII_AN_CTRL) and SS13 and SS6 bits (of SR_MII_CTRL)
> 
> So my understanding is that SR_MII_AN_ADV needs to be written only if
> TX_CONFIG=1 (SJA1105 calls this AUTONEG_CONTROL[PHY_MODE]).

Yes, agreed. Thankfully, SJA1105 sets PHY_MODE/TX_CONFIG=0 and leaves
SGMII_PHY_MODE_CTRL unaltered. TXGBE sets TX_CONFIG=1 but sets
SGMII_PHY_MODE_CTRL=1 which also avoids this requirement.

> That's quite
> different, and that will make sense when you consider that there's also
> a table with the places the autoneg code word gets its info from:
> 
> Config_Reg Bits in the 1000BASE-X and SGMII Mode
> 
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | Config_Reg bit | 1000Base-X mode   | SGMII mode value   | SGMII mode value                           |
>  |                |                   | when TX_CONFIG = 0 | when TX_CONFIG = 1                         |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 15             | Next page support | 0                  | Link up or down.                           |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
>  |                |                   |                    | this bit is derived from Bit 4             |
>  |                |                   |                    | (SGMII_LINK_STS) of the VR_MII_AN_CTRL.    |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
>  |                |                   |                    | this bit is derived from the input port    |
>  |                |                   |                    | 'xpcs_sgmii_link_sts_i'                    |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 14             | ACK               | 1                  | 1                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 13             | RF[1]             | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 12             | RF[0]             | 0                  | FULL_DUPLEX                                |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
>  |                |                   |                    | this bit is derived from Bit 5 (FD) of     |
>  |                |                   |                    | the SR_MII_AN_ADV.                         |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
>  |                |                   |                    | this bit is derived from the input port    |
>  |                |                   |                    | 'xpcs_sgmii_full_duplex_i'                 |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 11:10          | Reserved          | 0                  | SPEED                                      |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
>  |                |                   |                    | these bits are derived from Bit 13 (SS13)  |
>  |                |                   |                    | and Bit 6 (SS6) of the SR_MII_CTRL.        |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
>  |                |                   |                    | this bit is derived from the input port    |
>  |                |                   |                    | 'xpcs_sgmii_link_speed_i[1:0]'             |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 9              | Reserved          | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 8:7            | PAUSE[1:0]        | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 6              | HALF_DUPLEX       | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 5              | FULL_DUPLEX       | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 4:1            | Reserved          | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 0              | Reserved          | 1                  | 1                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
> 
> I haven't figured out either what might be going on with the KSZ9477
> integration, I just made a quick refresher and I thought this might be
> useful for you as well, Russell. I do notice Tristram does force
> TX_CONFIG=1 (DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII), but I don't understand
> what's truly behind that. Hopefully just a misunderstanding.

If you want to peek at the KSZ9477 documentation, what I'm looking at is
available from here:
https://www.microchip.com/en-us/product/ksz9477#Documentation

Interestingly, there are a number of errata:

Module 7 - SGMII auto-negotiation does not set bit 0 in the
auto-negotiation code word
Basically requires MII_ADVERTISE to be written as 0x1a0, and is only
needed after power-up or reset.

Module 8 - SGMII port link details from the connected SGMII PHY are not
passed properly to the port 7 GMAC
Basically, AUTONEG_INTR_STATUS needs to be read, and the PCS
manually programmed for the speed.

Module 15 - SGMII registers are not initialized by hardware reset
Requires that bit 15 of BASIC_CONTROL is set to reset the registers.

All three are not scheduled to be fixed, apparently.

There seems to be no information listed there regarding the requirement
for SGMII PHY mode.

> Tristram, why do you set this field to 1? Linux only supports the
> configuration where a MAC behaves like a MAC. There needs to be an
> entire discussion if you want to configure a MAC to be a SGMII autoneg
> master (like a PHY), how to model that.

(Using SJA1105 register terminology...)

Looking at the patch, Tristram is setting PHY_MODE=1 and SGMII_LINK=1
not when configuring for SGMII, but when configuring for 1000base-X.

This is reflected in the documentation for KSZ9477 - which states that
both these bits need to be set in "SerDes" aka 1000base-X mode. The
question is... where did that statement come from, and should we be
doing that for 1000base-X mode anyway?
Vladimir Oltean Jan. 28, 2025, 6:26 p.m. UTC | #10
On Tue, Jan 28, 2025 at 04:32:07PM +0000, Russell King (Oracle) wrote:
> I note that in the KSZ9477 documentation, bit[0] of VR_MII_DIG_CTRL1
> is marked as "reserved" and takes the value zero, so in the case of
> PHY-side SGMII, (6) always applies to KSZ9477 and (5) never does.

Good point. In human language, this means that when configuring the
KSZ9477 XPCS for the SGMII PHY role, it always expects the contents of
the autoneg code word to come from registers (VR_MII_AN_CTRL,
SR_MII_AN_ADV and SR_MII_CTRL), and never from input wires coming
from blocks external to the XPCS IP (xpcs_sgmii_link_sts_i,
xpcs_sgmii_full_duplex_i, xpcs_sgmii_link_speed_i[1:0]).

With the very important note that said SGMII PHY mode is still
under-specified in Linux, and the discussion about it still needs
to be had.

> This also solves my concern about 2a22b7ae2fa3 ("net: pcs: xpcs: adapt
> Wangxun NICs for SGMII mode"), because there (5) would apply.

Ok, so Wangxun shimmied in an operating mode where the XPCS does act
like a PHY, but receives the info about how to populate the autoneg
code word from wires, not from registers, and that's why Tristram is
noticing that the driver does not write the registers.

Not great, but at least the info we're gathering seems consistent thus far.

> > So my understanding is that SR_MII_AN_ADV needs to be written only if
> > TX_CONFIG=1 (SJA1105 calls this AUTONEG_CONTROL[PHY_MODE]).
> 
> Yes, agreed. Thankfully, SJA1105 sets PHY_MODE/TX_CONFIG=0 and leaves
> SGMII_PHY_MODE_CTRL unaltered. TXGBE sets TX_CONFIG=1 but sets
> SGMII_PHY_MODE_CTRL=1 which also avoids this requirement.

Correct, for SJA1105 I realized the mode where PHY_MODE/TX_CONFIG=1
isn't supported in phylink, and I didn't consider it important enough to
raise the issue, so it's simply not configurable that way. I was thinking,
just like we have phy-mode = "rev-mii" and "rev-rmii" for MII and RMII
in the role of a PHY, that something like "rev-sgmii" would be the way
to explore. But I really have no interest or use case to explore this myself.

> > That's quite
> > different, and that will make sense when you consider that there's also
> > a table with the places the autoneg code word gets its info from:
> > 
> > Config_Reg Bits in the 1000BASE-X and SGMII Mode
> > 
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | Config_Reg bit | 1000Base-X mode   | SGMII mode value   | SGMII mode value                           |
> >  |                |                   | when TX_CONFIG = 0 | when TX_CONFIG = 1                         |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 15             | Next page support | 0                  | Link up or down.                           |
> >  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
> >  |                |                   |                    | this bit is derived from Bit 4             |
> >  |                |                   |                    | (SGMII_LINK_STS) of the VR_MII_AN_CTRL.    |
> >  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
> >  |                |                   |                    | this bit is derived from the input port    |
> >  |                |                   |                    | 'xpcs_sgmii_link_sts_i'                    |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 14             | ACK               | 1                  | 1                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 13             | RF[1]             | 0                  | 0                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 12             | RF[0]             | 0                  | FULL_DUPLEX                                |
> >  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
> >  |                |                   |                    | this bit is derived from Bit 5 (FD) of     |
> >  |                |                   |                    | the SR_MII_AN_ADV.                         |
> >  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
> >  |                |                   |                    | this bit is derived from the input port    |
> >  |                |                   |                    | 'xpcs_sgmii_full_duplex_i'                 |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 11:10          | Reserved          | 0                  | SPEED                                      |
> >  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
> >  |                |                   |                    | these bits are derived from Bit 13 (SS13)  |
> >  |                |                   |                    | and Bit 6 (SS6) of the SR_MII_CTRL.        |
> >  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
> >  |                |                   |                    | this bit is derived from the input port    |
> >  |                |                   |                    | 'xpcs_sgmii_link_speed_i[1:0]'             |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 9              | Reserved          | 0                  | 0                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 8:7            | PAUSE[1:0]        | 0                  | 0                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 6              | HALF_DUPLEX       | 0                  | 0                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 5              | FULL_DUPLEX       | 0                  | 0                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 4:1            | Reserved          | 0                  | 0                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 0              | Reserved          | 1                  | 1                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> > 
> > I haven't figured out either what might be going on with the KSZ9477
> > integration, I just made a quick refresher and I thought this might be
> > useful for you as well, Russell. I do notice Tristram does force
> > TX_CONFIG=1 (DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII), but I don't understand
> > what's truly behind that. Hopefully just a misunderstanding.
> 
> If you want to peek at the KSZ9477 documentation, what I'm looking at is
> available from here:
> https://www.microchip.com/en-us/product/ksz9477#Documentation

Thanks.

> Interestingly, there are a number of errata:
> 
> Module 7 - SGMII auto-negotiation does not set bit 0 in the
> auto-negotiation code word
> Basically requires MII_ADVERTISE to be written as 0x1a0, and is only
> needed after power-up or reset.

Ok, I guess 0x1a0 would otherwise be the SR_MII_AN_ADV default value -
Nothing looks special about it. The layout of this register is the
table above. Bits 8:7 (PAUSE) and 5 (FULL_DUPLEX) are set, the rest
(NEXT_PAGE, REMOTE_FAULT, HALF_DUPLEX) are unset.

It's odd that writing this register would fix anything, especially
seeing that it isn't documented anywhere that bit 0 of the autoneg code
word would ever originate from SR_MII_AN_ADV in the 2 SGMII mode columns,
or would be affected by a modification of that register. But ok, I can't
contradict that...

It is under-specified whether the erratum occurs when TX_CONFIG is 1 or 0.
Unless specified otherwise, I am going to assume both modes.

I believe this erratum workaround should be implemented by Tristram;
I'm not seeing it in this patch.

> Module 8 - SGMII port link details from the connected SGMII PHY are not
> passed properly to the port 7 GMAC
> Basically, AUTONEG_INTR_STATUS needs to be read, and the PCS
> manually programmed for the speed.

Ok, I think this is answering my question to Tristram:
xpcs_link_up_sgmii_1000basex() needs to force the speed in MII_BMCR even
for PHYLINK_PCS_NEG_INBAND_ENABLED, where that otherwise shouldn't be
needed. It means that VR_MII_DIG_CTRL1[MAC_AUTO_SW] doesn't work? Bit 9
of "SGMII DIGITAL CONTROL REGISTER" is also hidden, and marked as reserved (0).

A reference to the KSZ9477 errata sheet module 8 in the code would be
nice. To me that is sufficient.

> Module 15 - SGMII registers are not initialized by hardware reset
> Requires that bit 15 of BASIC_CONTROL is set to reset the registers.

Nothing actionable here, the xpcs driver already performs reset.

> All three are not scheduled to be fixed, apparently.
> 
> There seems to be no information listed there regarding the requirement
> for SGMII PHY mode.

Not that I can find either.

> > Tristram, why do you set this field to 1? Linux only supports the
> > configuration where a MAC behaves like a MAC. There needs to be an
> > entire discussion if you want to configure a MAC to be a SGMII autoneg
> > master (like a PHY), how to model that.
> 
> (Using SJA1105 register terminology...)
> 
> Looking at the patch, Tristram is setting PHY_MODE=1 and SGMII_LINK=1
> not when configuring for SGMII, but when configuring for 1000base-X.

Yeah, you're right... (?!) Again, misunderstanding, I hope?

> This is reflected in the documentation for KSZ9477 - which states that
> both these bits need to be set in "SerDes" aka 1000base-X mode. The
> question is... where did that statement come from, and should we be
> doing that for 1000base-X mode anyway?

Here, because SJA1105 only supports SGMII and _not_ 1000Base-X, I don't
have practical experience. Thus I can only refer you to:

Programming Guidelines for Clause 37 Auto-Negotiation

The Clause 37 auto-negotiation is enabled only when Bit 12 of the
SR_MII_CTRL Register is set. For Backplane Ethernet configurations,
the default value of this bit is 0. For all other configurations, the
default value of this bit is 1. When this bit is enabled, the Clause 37
auto-negotiation is initiated on the following events:
- Power on Reset
- Soft Reset

The DWC_xpcs initiates the auto-negotiation on the following events:
- When the link partner requests auto-negotiation by transmitting
  configuration code groups.
- When the receive data path loses code group synchronization for more
  than 10 ms (in 1000BASE-X mode) or 1.6 ms (in SGMII mode).
- When an error condition is detected while receiving the /C/ or /I/
  order sets.
- When the host or software requests auto-negotiation by setting Bit 9
  in the SR_MII_CTRL Register.

The following list explains the auto-negotiation process:

1. The DWC_xpcs starts auto-negotiation by first transmitting the
   configuration words with all zeroes for 10 ms (1.6 ms for the SGMII
   interface).

2. The SR_MII_AN_ADV Register content is transmitted in the
   configuration words in the 1000BASE-X mode. In the SGMII mode, the
   values given in the "Config_Reg Bits in the 1000BASE-X and SGMII Mode"
   table are transmitted in the configuration word. The auto-negotiation
   is complete when both link partners have exchanged their base pages.

3. DWC_xpcs generates an interrupt on completion (sbd_intr_o) of
   auto-negotiation when Bit 0 of VR_MII_AN_CTRL Register is set to 1.

4. The auto-negotiation completion is indicated in the VR_MII_AN_INTR_STS
   register.

Note: In configurations with MDIO interface, you must read the
      VR_MII_AN_INTR_STS register after the auto-negotiation is complete.
      Auto-negotiation Status reads incorrect value when Status is not
      read in the previous Auto-negotiation session and MDC clock is not
      active when the Management is not accessing the PHY/PCS registers.
      Because of this limitation, Management may get incorrect
      information of Link partner and this can cause link failure.
      However, this limitation is not visible if the programming
      guidelines are followed as mentioned in the databook. This is
      because the minimum time between two successive auto-negotiations
      has at least 3.2ms and Management or Host is expected to read the
      current status of the Auto-negotiation before the next auto-negotiation
      is completed.

5. In the MAC attached to the DWC_xpcs, the Transmit and Receive Flow
   Control mode is determined based on the capabilities of the partner
   (given in SR_MII_LP_BABL) and the half-duplex or full-duplex
   operating mode.

   In SGMII auto-negotiation, the received link status such as speed,
   duplex mode is also given in the VR_MII_AN_INTR_STS Register.

   When Clause 37 auto-negotiation is complete, DWC_xpcs automatically
   resolves the duplex mode by selecting the duplex mode of its link
   partner. If auto-negotiation is disabled, the DWC_xpcs selects the
   duplex mode defined in Bit 8 of SR_MII_CTRL Register.

Nothing, in my reading, suggests to me that DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII
would be considered by the XPCS when operating in 1000Base-X mode
(DW_VR_MII_PCS_MODE_MASK == DW_VR_MII_PCS_MODE_C37_1000BASEX).
Tristram.Ha@microchip.com Jan. 29, 2025, 12:31 a.m. UTC | #11
> On Tue, Jan 28, 2025 at 12:33:11PM +0000, Russell King (Oracle) wrote:
> > I wonder what the original Synopsys documentation says about the AN
> > control register.
> 
> My private XPCS databook 2017-12.pdf, the applicability of which I cannot
> vouch for, has a section with programming guidelines for SGMII
> Auto-Negotiation. I quote:
> 
> | Clause 37 auto-negotiation can be performed in the SGMII mode by
> | programming various registers as follows:
> |
> | 1. In configurations with both 10G and 1G mode speed mode, switch
> |    DWC_xpcs to 1G speed mode by following the steps described in
> |    "Switching to 1G or KX Mode and 10 or 100 Mbps SGMII Speed".
> |
> | 2. In Backplane Ethernet PCS configurations, program bit[12] (AN_EN) of
> |    SR_AN_CTRL Register to 0 and bit[12] (CL37_BP) of VR_XS_PCS_DIG_CTRL1
> |    Register to 1.
> |
> | 3. Disable Clause 37 auto-negotiation by programming bit [12]
> |    (AN_ENABLE) of SR_MII_CTRL Register to 0 (in case it is already
> |    enabled).
> |
> | 4. Program various fields of VR_MII_AN_CTRL Register appropriately as
> |    follows:
> |    - Program PCS_MODE to 2’b10
> |    - Program TX_CONFIG to 1 (PHY side SGMII) or 0 (MAC side SGMII) based
> |      on your requirement
> |    - Program MII_AN_INTR_EN to 1, to enable auto-negotiation complete
> |      interrupt
> |    - If TX_CONFIG is set to 1 and bit[0] of VR_MII_DIG_CTRL1 Register is
> |      set to 0, program SGMII_LINK_STS to indicate the link status to the
> |      MAC side SGMII.
> |    - Program MII_CTRL to 0 or 1, as per your requirement.
> |
> | 5. If DWC_xpcs is configured as PHY-side SGMII in the above step, you
> |    can program bit [0] of VR_MII_DIG_CTRL1 Register to 1, if you wish to
> |    use the values of the xpcs_sgmii_link_sts_i input ports.
> |    xpcs_sgmii_full_duplex_i and xpcs_sgmii_link_speed_i as the
> |    transmitted configuration word.
> |
> | 6. If DWC_xpcs is configured as PHY-side SGMII and if bit[0] of
> |    VR_MII_DIG_CTRL1 Register is set to 0,
> |    - Program SS13 and SS6 bits of SR_MII_CTRL Register to the required
> |      SGMII Speed
> |    - Program bit [5] (FD) of SR_MII_AN_ADV to the desired mode. This
> |      step is mandatory even if you wish to leave the FD register bit to
> |      its default value.
> |
> | 7. If DWC_xpcs is configured as MAC-side SGMII in step 4, program bit[9]
> |    of VR_MII_DIG_CTRL1 Register to 1, for DWC_xpcs to automatically
> |    switch to the negotiated link-speed, after the completion of
> |    auto-negotiation.
> |
> | 8. Enable CL37 Auto-negotiation, by programming bit[12] of the
> |    SR_MII_CTRL Register to 1.
> 
> In my reading of these steps, writing to DW_VR_MII_AN_CTRL does not
> depend on a subsequent write to SR_MII_AN_ADV to take effect.
> But there is this additional note in the databook:
> 
> | If TX_CONFIG=1 and Bit[0] (SGMII_PHY_MODE_CTRL) of VR_MII_DIG_CTRL1 = 0,
> | program the SR_MII_AN_ADV only after programming 'SGMII_LINK_STS' bit
> | (of VR_MII_AN_CTRL) and SS13 and SS6 bits (of SR_MII_CTRL)
> 
> So my understanding is that SR_MII_AN_ADV needs to be written only if
> TX_CONFIG=1 (SJA1105 calls this AUTONEG_CONTROL[PHY_MODE]). That's quite
> different, and that will make sense when you consider that there's also
> a table with the places the autoneg code word gets its info from:
> 
> Config_Reg Bits in the 1000BASE-X and SGMII Mode
> 
>  +----------------+-------------------+--------------------+-----------------------------------------
> ---+
>  | Config_Reg bit | 1000Base-X mode   | SGMII mode value   | SGMII mode value
> |
>  |                |                   | when TX_CONFIG = 0 | when TX_CONFIG = 1                         |
>  +----------------+-------------------+--------------------+-----------------------------------------
> ---+
>  | 15             | Next page support | 0                  | Link up or down.                           |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL ==
> 0, |
>  |                |                   |                    | this bit is derived from Bit 4             |
>  |                |                   |                    | (SGMII_LINK_STS) of the VR_MII_AN_CTRL.    |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL ==
> 1, |
>  |                |                   |                    | this bit is derived from the input port    |
>  |                |                   |                    | 'xpcs_sgmii_link_sts_i'                    |
>  +----------------+-------------------+--------------------+-----------------------------------------
> ---+
>  | 14             | ACK               | 1                  | 1                                          |
>  +----------------+-------------------+--------------------+-----------------------------------------
> ---+
>  | 13             | RF[1]             | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+-----------------------------------------
> ---+
>  | 12             | RF[0]             | 0                  | FULL_DUPLEX                                |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL ==
> 0, |
>  |                |                   |                    | this bit is derived from Bit 5 (FD) of     |
>  |                |                   |                    | the SR_MII_AN_ADV.                         |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL ==
> 1, |
>  |                |                   |                    | this bit is derived from the input port    |
>  |                |                   |                    | 'xpcs_sgmii_full_duplex_i'                 |
>  +----------------+-------------------+--------------------+-----------------------------------------
> ---+
>  | 11:10          | Reserved          | 0                  | SPEED                                      |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL ==
> 0, |
>  |                |                   |                    | these bits are derived from Bit 13 (SS13)  |
>  |                |                   |                    | and Bit 6 (SS6) of the SR_MII_CTRL.        |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL ==
> 1, |
>  |                |                   |                    | this bit is derived from the input port    |
>  |                |                   |                    | 'xpcs_sgmii_link_speed_i[1:0]'             |
>  +----------------+-------------------+--------------------+-----------------------------------------
> ---+
>  | 9              | Reserved          | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+-----------------------------------------
> ---+
>  | 8:7            | PAUSE[1:0]        | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+-----------------------------------------
> ---+
>  | 6              | HALF_DUPLEX       | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+-----------------------------------------
> ---+
>  | 5              | FULL_DUPLEX       | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+-----------------------------------------
> ---+
>  | 4:1            | Reserved          | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+-----------------------------------------
> ---+
>  | 0              | Reserved          | 1                  | 1                                          |
>  +----------------+-------------------+--------------------+-----------------------------------------
> ---+
> 
> I haven't figured out either what might be going on with the KSZ9477
> integration, I just made a quick refresher and I thought this might be
> useful for you as well, Russell. I do notice Tristram does force
> TX_CONFIG=1 (DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII), but I don't understand
> what's truly behind that. Hopefully just a misunderstanding.
> 
> Tristram, why do you set this field to 1? Linux only supports the
> configuration where a MAC behaves like a MAC. There needs to be an
> entire discussion if you want to configure a MAC to be a SGMII autoneg
> master (like a PHY), how to model that.
> 
> Could you confirm that the requirement to program the SGMII
> Auto-Negotiation Advertisement Register only exists if the Transmit
> Configuration Master field is programmed to 1, like you are doing?

The default value of DW_VR_MII_AN_CTRL is DW_VR_MII_PCS_MODE_C37_SGMII
(0x04).  When a SGMII SFP is used the SGMII port works without any
programming.  So for example network communication can be done in U-Boot
through the SGMII port.  When a 1000BaseX SFP is used that register needs
to be programmed (DW_VR_MII_SGMII_LINK_STS |
DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII | DW_VR_MII_PCS_MODE_C37_1000BASEX)
(0x18) for it to work.

(DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII has to be used together with
DW_VR_MII_TX_CONFIG_MASK to mean 0x08.  Likewise for
DW_VR_MII_PCS_MODE_C37_SGMII and DW_VR_MII_PCS_MODE_MASK to mean 0x04.
It is a little difficult to just use those names to indicate the actual
value.)

DW_VR_MII_DIG_CTRL1 is never touched.  DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW
does not exist in KSZ9477 implementation.  As setting that bit does not
have any effect I did not do anything about it.

It does have the intended effect of separating SGMII and 1000BaseX modes
in later versions.  And DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL is used along
with it.  They are mutually exclusive.  For SGMII SFP
DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW is set; for 1000BaseX SFP
DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL is set.

KSZ9477 errata module 7 indicates the MII_ADVERTISE register needs to be
set 0x01A0.  This is done with phylink_mii_c22_pcs_encode_advertisement()
but only for 1000BaseX mode.  I probably need to add that code in SGMII
configuration.  The default value of this register is 0x20.  This update
depends on SFP.  So far I did not find a SGMII SFP that requires this
setting.  This issue is more like the hardware did not set the default
value properly.  As I said, the SGMII port works with SGMII SFP after
power up without programming anything.

I am always confused by the master/slave - phy/mac nature of the SFP.
The hardware designers seem to think the SGMII module is acting as a
master then the slave is on the other side, like physically outside the
chip.  I generally think of the slave is inside the SFP, as every board
is programmed that way.

The original instruction was to set 0x18 for SerDes mode, which is used
for 1000BaseX.  Even though the bits have SGMII names they do not mean
SGMII operation, although I do not know the exact definition of SGMII.

Note the evaluation board never has SFP cage logic so I never knew there
is a PHY inside the SGMII SFP.  For SFPs with label 10/100/1000Base-T
they start in SGMII mode.  For SFPs with just 1000Base-T they start in
1000BaseX mode and needs 0x18 value to work.  In Linux the PHY inside the
SFP can switch back to SGMII mode and so the SGMII setting is used
because the EEPROM says SGMII mode is supported.  There are some SFPs
that will use only 1000BaseX mode.  I wonder why the SFP manufacturers do
that.  It seems the PHY access is also not reliable as some registers
always have 0xff value in lower part of the 16-bit value.  That may be
the reason that there is a special Marvell PHY id just for Finisar.
Tristram.Ha@microchip.com Jan. 29, 2025, 12:31 a.m. UTC | #12
> On Mon, Jan 27, 2025 at 07:32:25PM -0800, Tristram.Ha@microchip.com wrote:
> > However SGMII mode in KSZ9477 has a bug in which the current speed
> > needs to be set in MII_BMCR to pass traffic.  The current driver code
> > does not do anything when link is up with auto-negotiation enabled, so
> > that code needs to be changed for KSZ9477.
> 
> Does this claimed SGMII bug have an erratum number like Microchip usually
> assign for something this serious? Is it something we can look up?

KSZ9477 errata module 7 indicates the MII_ADVERTISE register needs to be
set 0x01A0.  This is done with phylink_mii_c22_pcs_encode_advertisement()
but only for 1000BaseX mode.  I probably need to add that code in SGMII
configuration.  The default value of this register is 0x20.  This update
depends on SFP.  So far I did not find a SGMII SFP that requires this
setting.  This issue is more like the hardware did not set the default
value properly.

KSZ9477 errata module 8 indicates the MII_BMCR register needs to be
updated with the correct speed setting.
Vladimir Oltean Jan. 29, 2025, 9:12 p.m. UTC | #13
On Wed, Jan 29, 2025 at 12:31:09AM +0000, Tristram.Ha@microchip.com wrote:
> The default value of DW_VR_MII_AN_CTRL is DW_VR_MII_PCS_MODE_C37_SGMII
> (0x04).  When a SGMII SFP is used the SGMII port works without any
> programming.  So for example network communication can be done in U-Boot
> through the SGMII port.  When a 1000BaseX SFP is used that register needs
> to be programmed (DW_VR_MII_SGMII_LINK_STS |
> DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII | DW_VR_MII_PCS_MODE_C37_1000BASEX)
> (0x18) for it to work.

Can it be that DW_VR_MII_PCS_MODE_C37_1000BASEX is the important setting
when writing 0x18, and the rest is just irrelevant and bogus? If not,
could you please explain what is the role of DW_VR_MII_SGMII_LINK_STS |
DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII for 1000Base-X? The XPCS data book
does not suggest they would be considered for 1000Base-X operation. Are
you suggesting for KSZ9477 that is different? If so, please back that
statement up.

> (DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII has to be used together with
> DW_VR_MII_TX_CONFIG_MASK to mean 0x08.  Likewise for
> DW_VR_MII_PCS_MODE_C37_SGMII and DW_VR_MII_PCS_MODE_MASK to mean 0x04.
> It is a little difficult to just use those names to indicate the actual
> value.)
> 
> DW_VR_MII_DIG_CTRL1 is never touched.  DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW
> does not exist in KSZ9477 implementation.  As setting that bit does not
> have any effect I did not do anything about it.

Never touched by whom? xpcs_config_aneg_c37_sgmii() surely tries to
touch it... Don't you think that the absence of this bit from the
KSZ9477 implementation might have something to do with KSZ9477's unique
need to force the link speed when in in-band mode?

Here's a paragraph about this from the data book:

DWC_xpcs supports automatic reconfiguration of SGMII speed/duplex mode
based on the outcome of auto-negotiation. This feature can be enabled by
programming bit[9] (MAC_AUTO_SW) of VR_MII_DIG_CTRL1 when operating in
SGMII MAC mode. DWC_xpcs is initially configured in the speed/duplex
mode as programmed in the SR_MII_CTRL. If MAC_AUTO_SW bit is enabled,
DWC_xpcs automatically switches to the negotiated speed mode after the
completion of CL37 Auto-negotiation. This eliminates the software
overhead of reading CL37 AN SGMII Status from VR_MII_AN_INTR_STS and
then programming SS13 and SS6 speed-selection bits of SR_MII_CTRL
appropriately.

> It does have the intended effect of separating SGMII and 1000BaseX modes
> in later versions.  And DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL is used along
> with it.  They are mutually exclusive.  For SGMII SFP
> DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW is set; for 1000BaseX SFP
> DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL is set.

It's difficult for me to understand what you are trying to communicate here.

> KSZ9477 errata module 7 indicates the MII_ADVERTISE register needs to be
> set 0x01A0.  This is done with phylink_mii_c22_pcs_encode_advertisement()
> but only for 1000BaseX mode.  I probably need to add that code in SGMII
> configuration.  The default value of this register is 0x20.  This update
> depends on SFP.  So far I did not find a SGMII SFP that requires this
> setting.  This issue is more like the hardware did not set the default
> value properly.  As I said, the SGMII port works with SGMII SFP after
> power up without programming anything.
> 
> I am always confused by the master/slave - phy/mac nature of the SFP.
> The hardware designers seem to think the SGMII module is acting as a
> master then the slave is on the other side, like physically outside the
> chip.  I generally think of the slave is inside the SFP, as every board
> is programmed that way.
> 
> The original instruction was to set 0x18 for SerDes mode, which is used
> for 1000BaseX.  Even though the bits have SGMII names they do not mean
> SGMII operation, although I do not know the exact definition of SGMII.

It sounds like you should run "sgmii spec" by your favorite search
engine and give it a read, it's a freely available PDF only several
pages long, and it will be worth spending your time.

> Note the evaluation board never has SFP cage logic so I never knew there
> is a PHY inside the SGMII SFP.

What kind of SFP cage logic does the evaluation board have?

> For SFPs with label 10/100/1000Base-T
> they start in SGMII mode.  For SFPs with just 1000Base-T they start in
> 1000BaseX mode and needs 0x18 value to work.  In Linux the PHY inside the
> SFP can switch back to SGMII mode and so the SGMII setting is used
> because the EEPROM says SGMII mode is supported.  There are some SFPs
> that will use only 1000BaseX mode.  I wonder why the SFP manufacturers do
> that.  It seems the PHY access is also not reliable as some registers
> always have 0xff value in lower part of the 16-bit value.  That may be
> the reason that there is a special Marvell PHY id just for Finisar.
Russell King (Oracle) Jan. 29, 2025, 10:05 p.m. UTC | #14
(To Tristram as well - I've added a workaround for your company mail
sewers that don't accept bounces from emails that have left your
organisation - in other words, once they have left your companies
mail servers, you have no idea whether they reached their final
recipient. You only get to know if your email servers can't send it
to the very _next_ email server.)

On Wed, Jan 29, 2025 at 11:12:26PM +0200, Vladimir Oltean wrote:
> On Wed, Jan 29, 2025 at 12:31:09AM +0000, Tristram.Ha@microchip.com wrote:
> > The default value of DW_VR_MII_AN_CTRL is DW_VR_MII_PCS_MODE_C37_SGMII
> > (0x04).  When a SGMII SFP is used the SGMII port works without any
> > programming.  So for example network communication can be done in U-Boot
> > through the SGMII port.  When a 1000BaseX SFP is used that register needs
> > to be programmed (DW_VR_MII_SGMII_LINK_STS |
> > DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII | DW_VR_MII_PCS_MODE_C37_1000BASEX)
> > (0x18) for it to work.
> 
> Can it be that DW_VR_MII_PCS_MODE_C37_1000BASEX is the important setting
> when writing 0x18, and the rest is just irrelevant and bogus? If not,
> could you please explain what is the role of DW_VR_MII_SGMII_LINK_STS |
> DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII for 1000Base-X? The XPCS data book
> does not suggest they would be considered for 1000Base-X operation. Are
> you suggesting for KSZ9477 that is different? If so, please back that
> statement up.

Agreed. I know that the KSZ9477 information says differently, but if
the implementation is the Synopsys DesignWare XPCS, then surely the
register details in Synopsys' documentation should apply... so I'm
also interested to know why KSZ9477 seems to deviate from Synopsys'
implementation on this need.

I've been wondering whether setting DW_VR_MII_SGMII_LINK_STS and
DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII in 1000base-X mode is something
that should be done anyway, but from what Vladimir is saying, there's
nothing in Synopsys' documentation that supports it.

The next question would be whether it's something that we _could_
always do - if it has no effect for anyone else, then removing a
conditional simplifies the code.

> > (DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII has to be used together with
> > DW_VR_MII_TX_CONFIG_MASK to mean 0x08.  Likewise for
> > DW_VR_MII_PCS_MODE_C37_SGMII and DW_VR_MII_PCS_MODE_MASK to mean 0x04.
> > It is a little difficult to just use those names to indicate the actual
> > value.)
> > 
> > DW_VR_MII_DIG_CTRL1 is never touched.  DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW
> > does not exist in KSZ9477 implementation.  As setting that bit does not
> > have any effect I did not do anything about it.
> 
> Never touched by whom? xpcs_config_aneg_c37_sgmii() surely tries to
> touch it... Don't you think that the absence of this bit from the
> KSZ9477 implementation might have something to do with KSZ9477's unique
> need to force the link speed when in in-band mode?

I think Tristram is talking about xpcs_config_aneg_c37_1000basex()
here, not SGMII.

Tristram, as a general note: there is a reason I utterly hate the term
"SGMII" - and the above illustrates exactly why. There is Cisco SGMII
(the modified 1000base-X protocol for use with PHYs.) Then there is the
"other" SGMII that manufacturers like to band about because they want
to describe their "Serial Gigabit Media Independent Interface" and they
use it to describe an interface that supports both 1000base-X and Cisco
SGMII.

This overloading of "SGMII" leads to nothing but confusion - please be
specific about whether you are talking about 1000base-X or Cisco SGMII,
and please please please avoid using "SGMII".

However, in the kernel code, we use "SGMII" exclusively to mean Cisco
SGMII.


> > It does have the intended effect of separating SGMII and 1000BaseX modes
> > in later versions.  And DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL is used along
> > with it.  They are mutually exclusive.  For SGMII SFP
> > DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW is set; for 1000BaseX SFP
> > DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL is set.
> 
> It's difficult for me to understand what you are trying to communicate here.

I think it makes sense - MAC_AUTO_SW is meaningless in 1000base-X mode
because the speed is fixed at 1G, whereas in Cisco SGMII MAC mode this
bit allows the PCS to change its speed setting according to the AN
result.

> > KSZ9477 errata module 7 indicates the MII_ADVERTISE register needs to be
> > set 0x01A0.  This is done with phylink_mii_c22_pcs_encode_advertisement()
> > but only for 1000BaseX mode.  I probably need to add that code in SGMII
> > configuration.

Hang on one moment... I think we're going off to another problem.

For 1000base-X, we do use phylink_mii_c22_pcs_encode_advertisement()
which will generate the advertisement word for 1000base-X.

For Cisco SGMII, it will generate the tx_config word for a MAC-side
setup (which is basically the fixed 0x4001 value.) From what I read
in KSZ9477, this value would be unsuitable for a case where the
following register values are:

	DW_VR_MII_PCS_MODE_C37_SGMII set
	DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII set
	DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL clear

meaning that we're generating a SGMII PHY-side word indicating the
parameters to be used from the registers rather than hardware signals.

> > The default value of this register is 0x20.  This update
> > depends on SFP.  So far I did not find a SGMII SFP that requires this
> > setting.  This issue is more like the hardware did not set the default
> > value properly.  As I said, the SGMII port works with SGMII SFP after
> > power up without programming anything.
> > 
> > I am always confused by the master/slave - phy/mac nature of the SFP.
> > The hardware designers seem to think the SGMII module is acting as a
> > master then the slave is on the other side, like physically outside the
> > chip.  I generally think of the slave is inside the SFP, as every board
> > is programmed that way.

I think you're getting confused by microchip terminology.

Cisco SGMII is an asymmetric protocol. Cisco invented it as a way of:
1. supporting 10M and 100M speeds over a single data pair in each
   direction.
2. sending the parameters of that link from the PHY to the MAC/PCS over
   that single data pair.

They took the IEEE 1000base-X specification as a basis (which is
symmetric negotiation via a 16-bit word).

The Cisco SGMII configuration word from the PHY to the PCS/MAC
contains:

	bit 15 - link status
	bit 14 - (reserved for AN acknowledge as per 1000base-X)
	bit 13 - reserved (zero)
	bit 12 - duplex mode
	bit 11, 10 - speed
	bits 9..1 - reserved (zero)
	bit 0 - set to 1

This is "PHY" mode, or in Microchip parlence "master" mode - because
the PHY is dictating what the other end should be doing.

When the PCS/MAC receives this, the PCS/MAC is expected to respond
with a configuration word containing:

	bit 15 - zero
	bit 14 - set to 1 (indicating acknowledge)
	bit 13..1 - zero
	bit 0 - set to 1

This is MAC mode, or in Microchip parlence "slave" mode - because the
MAC is being told what it should do.

So, for a Cisco SGMII link with a SFP module which has a PHY embedded
inside, you definitely want to be using MAC mode, because the PHY on
the SFP module will be dictating the speed and duplex to the components
outside of the SFP - in other words the PCS and MAC.

> > For SFPs with label 10/100/1000Base-T
> > they start in SGMII mode.  For SFPs with just 1000Base-T they start in
> > 1000BaseX mode and needs 0x18 value to work.  In Linux the PHY inside the
> > SFP can switch back to SGMII mode and so the SGMII setting is used
> > because the EEPROM says SGMII mode is supported.

Ummm.... not quite. The SFP module EEPROM actually says absolutely
nothing about whether 1000base-X or Cisco SGMII should be used
with a module. The Linux SFP code (which I wrote, so I've torn my hair
out over this issue) does a best guess based on what it finds - but
ultimately, what it comes down to is... if we find a PHY that we can
access, it is a gigabit PHY, and we have a driver for it, then we
(re)program the PHY to use Cisco SGMII.

If we don't find a PHY, and it doesn't look like it's a copper module,
then we use 1000base-X (because SFPs were originally for fibre.)

We do have quirks to cope with weirdness, particularly with GPON
modules.

> > There are some SFPs
> > that will use only 1000BaseX mode.  I wonder why the SFP manufacturers do
> > that.  It seems the PHY access is also not reliable as some registers
> > always have 0xff value in lower part of the 16-bit value.  That may be
> > the reason that there is a special Marvell PHY id just for Finisar.

I don't have any modules that have a Finisar PHY rather than a Marvell
PHY. I wonder if the problem is that the Finisar module doesn't like
multi-byte I2C accesses to the PHY.

Another thing - make sure that the I2C bus to the SFP cage is running
at 100kHz, not 400kHz.

For Vladimir: I've added four hacky patches that build on top of the
large RFC series I sent earlier which add probably saner configuration
for the SGMII code, hopefully making it more understandable in light
of Wangxun's TXGBE using PHY mode there (they were adamant that their
hardware requires it.) These do not address Tristram's issue.
Vladimir Oltean Jan. 29, 2025, 11:05 p.m. UTC | #15
On Wed, Jan 29, 2025 at 10:05:20PM +0000, Russell King (Oracle) wrote:
> > > It does have the intended effect of separating SGMII and 1000BaseX modes
> > > in later versions.  And DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL is used along
> > > with it.  They are mutually exclusive.  For SGMII SFP
> > > DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW is set; for 1000BaseX SFP
> > > DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL is set.
> > 
> > It's difficult for me to understand what you are trying to communicate here.
> 
> I think it makes sense - MAC_AUTO_SW is meaningless in 1000base-X mode
> because the speed is fixed at 1G, whereas in Cisco SGMII MAC mode this
> bit allows the PCS to change its speed setting according to the AN
> result.

The bit which you've just explained is the only portion that made some
sense to me. What did not make sense was:

- What is the subject of the first sentence? "it has the intended effect
  of separating SGMII and 1000BaseX modes" <- who?

- "For 1000BaseX SFP, PHY_MODE_CTRL is set"? How come, and according to whom?
  PHY_MODE_CTRL, as I've previously pasted from the XPCS data book in a
  previous table, is a field which selects, while in SGMII PHY mode,
  whether the contents of the auto-negotiation code word comes from
  wires (when set to 1) or from registers (when set to 0).

For this second reply, I even went back to triple-check this, and I am
copying this additional sentence about PHY_MODE_CTRL.

| Note: This bit should be set only when XPCS is configured as
| SGMII/QSGMII PHY i.e., TX_CONFIG=1
| In other configurations, this field is reserved and read-only.

So it is very confusing to me that Tristram would be talking about
PHY_MODE_CTRL in the context of 1000Base-X. I don't know what this
denotes, but it just makes me question whether whatever he's been
calling 1000Base-X all along is something else entirely. This
"guessing what Tristram is trying to say" game is hard.

> For Vladimir: I've added four hacky patches that build on top of the
> large RFC series I sent earlier which add probably saner configuration
> for the SGMII code, hopefully making it more understandable in light
> of Wangxun's TXGBE using PHY mode there (they were adamant that their
> hardware requires it.) These do not address Tristram's issue.

Ok, let's sidetrack Tristram's thread, sure.

Patch 2: correct but

+	/* PHY_MODE_CTRL only applies for PHY-side SGMII. When PHY_MODE_CTRL
+	 * is set, the SGMII tx_config register bits 15 (link), 12 (duplex)
+	 * and 11:10 (speed) sent is derived from hardware inputs to the XPCS.
+	 * When clear, bit 15 comes from DW_VR_MII_AN_CTRL bit 4, bit 12 from
+	 * MII_ADVERTISE bit 5, and bits 11:10 from MII_BMCR speed bits. In
+	 * the latter case, some implementation documentatoin states that
                                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                                 integration documentation
+	 * MII_ADVERTISE must be written last.
+	 */

Patch 3: "DW_XPCS_SGMII_10_100_UNCHANGED" instead of "UNSET", maybe?
Maybe it's just me, but "unset" sounds like "set to 0"/"cleared".

Patch 4: same "documentatoin" typo.

Otherwise I think there is value in these clarification patches.
Tristram.Ha@microchip.com Jan. 30, 2025, 4:50 a.m. UTC | #16
> On Wed, Jan 29, 2025 at 12:31:09AM +0000, Tristram.Ha@microchip.com wrote:
> > The default value of DW_VR_MII_AN_CTRL is DW_VR_MII_PCS_MODE_C37_SGMII
> > (0x04).  When a SGMII SFP is used the SGMII port works without any
> > programming.  So for example network communication can be done in U-Boot
> > through the SGMII port.  When a 1000BaseX SFP is used that register needs
> > to be programmed (DW_VR_MII_SGMII_LINK_STS |
> > DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII |
> DW_VR_MII_PCS_MODE_C37_1000BASEX)
> > (0x18) for it to work.
> 
> Can it be that DW_VR_MII_PCS_MODE_C37_1000BASEX is the important setting
> when writing 0x18, and the rest is just irrelevant and bogus? If not,
> could you please explain what is the role of DW_VR_MII_SGMII_LINK_STS |
> DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII for 1000Base-X? The XPCS data book
> does not suggest they would be considered for 1000Base-X operation. Are
> you suggesting for KSZ9477 that is different? If so, please back that
> statement up.

As I mentioned before, the IP used in KSZ9477 is old and so may not match
the behavior in current DesignWare specifications.  I experimented with
different settings and observed these behaviors.

DW_VR_MII_DIG_CTRL1 has default value 0x2200.  Setting MAC_AUTO_SW (9)
has no effect as the value read back does not retain the bit.  Setting
PHY_MODE_CTRL (0) retains the bit but does not seem to have any effect
and it is not required for operation.  So we can ignore this register
for KSZ9477.

DW_VR_MII_AN_CTRL has default value 0x0004, which means C37_SGMII is
enabled.  Plugging in a 10/100/1000Base-T SFP will work without doing
anything.

Setting TX_CONFIG_PHY_SIDE_SGMII (3) requires auto-negotiation to be
disabled in MII_BMCR for the port to work.

SGMII_LINK_STS (4) depends on TX_CONFIG_PHY_SIDE_SGMII.  If that bit is
not set then this bit has no effect.  The result of setting this bit is
auto-negotiation can be enabled in MII_BMCR.

So C37_SGMII mode can still work with TX_CONFIG_PHY_SIDE_SGMII on and
auto-negotiation disabled.  But the problem is when the cable is
unplugged and plugged the port does not work as the module cannot detect
the link.  Enabling auto-negotiation and then disabling it will cause
the port to work again.

Now for 1000BaseX mode C37_1000BASEX is used and when auto-negotiation
is disabled the port works.  For the XPCS driver this can be done by
setting neg_mode to false at the beginning.  Problem is this flag can
only be set once at driver initialization.  When auto-negotiation is not
used then SFP using SGMII mode does not work properly.

So for 1000BaseX mode TX_CONFIG_PHY_SIDE_SGMII can be turned on and then
SGMII_LINK_STS allows auto-negotiation to be enabled all the time for
both SGMII and 1000BaseX modes to work.

C37_SGMII working:

Auto-negotiation enabled

Auto-negotiation disabled
TX_CONFIG_PHY_SIDE_SGMII on
(stop working after cable is unplugged and re-plugged)

C37_1000BASEX working:

Auto-negotiation disabled

Auto-negotiation disabled
TX_CONFIG_PHY_SIDE_SGMII on

Auto-negotiation enabled
TX_CONFIG_PHY_SIDE_SGMII on
SGMII_LINK_STS on

Note this behavior for 1000BaseX mode only occurs in KSZ9477, so we can
stop finding the reasons with current specs.

Microchip has another chip with newer IP version that does not have this
behavior for 1000BaseX mode.  That is, it does not require
auto-negotiation to be disabled for the port to work.  However, that chip
has major issues when using 2.5G mode so I do not know how reliable it is
when using 1G mode.

> > (DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII has to be used together with
> > DW_VR_MII_TX_CONFIG_MASK to mean 0x08.  Likewise for
> > DW_VR_MII_PCS_MODE_C37_SGMII and DW_VR_MII_PCS_MODE_MASK to mean
> 0x04.
> > It is a little difficult to just use those names to indicate the actual
> > value.)
> >
> > DW_VR_MII_DIG_CTRL1 is never touched.
> DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW
> > does not exist in KSZ9477 implementation.  As setting that bit does not
> > have any effect I did not do anything about it.
> 
> Never touched by whom? xpcs_config_aneg_c37_sgmii() surely tries to
> touch it... Don't you think that the absence of this bit from the
> KSZ9477 implementation might have something to do with KSZ9477's unique
> need to force the link speed when in in-band mode?
> 
> Here's a paragraph about this from the data book:
> 
> DWC_xpcs supports automatic reconfiguration of SGMII speed/duplex mode
> based on the outcome of auto-negotiation. This feature can be enabled by
> programming bit[9] (MAC_AUTO_SW) of VR_MII_DIG_CTRL1 when operating in
> SGMII MAC mode. DWC_xpcs is initially configured in the speed/duplex
> mode as programmed in the SR_MII_CTRL. If MAC_AUTO_SW bit is enabled,
> DWC_xpcs automatically switches to the negotiated speed mode after the
> completion of CL37 Auto-negotiation. This eliminates the software
> overhead of reading CL37 AN SGMII Status from VR_MII_AN_INTR_STS and
> then programming SS13 and SS6 speed-selection bits of SR_MII_CTRL
> appropriately.
> 
> > It does have the intended effect of separating SGMII and 1000BaseX modes
> > in later versions.  And DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL is used along
> > with it.  They are mutually exclusive.  For SGMII SFP
> > DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW is set; for 1000BaseX SFP
> > DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL is set.
> 
> It's difficult for me to understand what you are trying to communicate here.

The new chip has MAC_AUTO_SW bit in DIG_CTRL1 register and PHY_MODE_CTRL
is required to operate the port.  However I am not sure about the
MAC_AUTO_SW bit as it is not required for SGMII operation.  When it is on
PHY_MODE_CTRL becomes don't care and can be turned off.  It is primarily
used to detect the type of SFPs at the beginning.  Note this chip does
not use Linux so I cannot verify its function with the XPCS driver, but
the 2.5G code in that driver is so simple I wonder if it is complete.

One major difference is the 2G5_EN bit, which seems to be just to enable
2.5G mode, but it is not so in Microchip implementation as the bit means
2.5G mode is being used and so hardware needs to do something special to
manage the bandwidth.  This bit is not required to be turned off for 1G
mode.  There are other registers to set to change to 2.5G or 1G mode.
This is a little out of topic.

> > Note the evaluation board never has SFP cage logic so I never knew there
> > is a PHY inside the SGMII SFP.
> 
> What kind of SFP cage logic does the evaluation board have?

The original KSZ9477 evaluation board never has this SFP cage logic and
during development we never used that to verify SGMII function.  There is
a way to detect which type of SFP is used so reading its EEPROM is never
required.  There may be exceptions.

Microchip had to build a new board to submit this DSA driver patch for
SGMII support.
Tristram.Ha@microchip.com Jan. 30, 2025, 4:50 a.m. UTC | #17
> (To Tristram as well - I've added a workaround for your company mail
> sewers that don't accept bounces from emails that have left your
> organisation - in other words, once they have left your companies
> mail servers, you have no idea whether they reached their final
> recipient. You only get to know if your email servers can't send it
> to the very _next_ email server.)
> 
> On Wed, Jan 29, 2025 at 11:12:26PM +0200, Vladimir Oltean wrote:
> > On Wed, Jan 29, 2025 at 12:31:09AM +0000, Tristram.Ha@microchip.com wrote:
> > > The default value of DW_VR_MII_AN_CTRL is
> DW_VR_MII_PCS_MODE_C37_SGMII
> > > (0x04).  When a SGMII SFP is used the SGMII port works without any
> > > programming.  So for example network communication can be done in U-Boot
> > > through the SGMII port.  When a 1000BaseX SFP is used that register needs
> > > to be programmed (DW_VR_MII_SGMII_LINK_STS |
> > > DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII |
> DW_VR_MII_PCS_MODE_C37_1000BASEX)
> > > (0x18) for it to work.
> >
> > Can it be that DW_VR_MII_PCS_MODE_C37_1000BASEX is the important setting
> > when writing 0x18, and the rest is just irrelevant and bogus? If not,
> > could you please explain what is the role of DW_VR_MII_SGMII_LINK_STS |
> > DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII for 1000Base-X? The XPCS data book
> > does not suggest they would be considered for 1000Base-X operation. Are
> > you suggesting for KSZ9477 that is different? If so, please back that
> > statement up.
> 
> Agreed. I know that the KSZ9477 information says differently, but if
> the implementation is the Synopsys DesignWare XPCS, then surely the
> register details in Synopsys' documentation should apply... so I'm
> also interested to know why KSZ9477 seems to deviate from Synopsys'
> implementation on this need.
> 
> I've been wondering whether setting DW_VR_MII_SGMII_LINK_STS and
> DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII in 1000base-X mode is something
> that should be done anyway, but from what Vladimir is saying, there's
> nothing in Synopsys' documentation that supports it.
> 
> The next question would be whether it's something that we _could_
> always do - if it has no effect for anyone else, then removing a
> conditional simplifies the code.

I explained in the other email that this SGMII_LINK_STS |
TX_CONFIG_PHY_SIDE_SGMII setting is only required for 1000BASEX where
C37_1000BASEX is used instead of C37_SGMII and auto-negotiation is
enabled.

This behavior only occurs in KSZ9477 with old IP and so may not reflect
in current specs.  If neg_mode can be set in certain way that disables
auto-negotiation in 1000BASEX mode but enables auto-negotiation in SGMII
mode then this setting is not required.

> > Never touched by whom? xpcs_config_aneg_c37_sgmii() surely tries to
> > touch it... Don't you think that the absence of this bit from the
> > KSZ9477 implementation might have something to do with KSZ9477's unique
> > need to force the link speed when in in-band mode?
> 
> I think Tristram is talking about xpcs_config_aneg_c37_1000basex()
> here, not SGMII.
> 
> Tristram, as a general note: there is a reason I utterly hate the term
> "SGMII" - and the above illustrates exactly why. There is Cisco SGMII
> (the modified 1000base-X protocol for use with PHYs.) Then there is the
> "other" SGMII that manufacturers like to band about because they want
> to describe their "Serial Gigabit Media Independent Interface" and they
> use it to describe an interface that supports both 1000base-X and Cisco
> SGMII.
> 
> This overloading of "SGMII" leads to nothing but confusion - please be
> specific about whether you are talking about 1000base-X or Cisco SGMII,
> and please please please avoid using "SGMII".
> 
> However, in the kernel code, we use "SGMII" exclusively to mean Cisco
> SGMII.

I use the terms SGMII and 1000BASEX just like in Linux driver where there
are PHY_INTERFACE_MODE_SGMII and PHY_INTERFACE_MODE_1000BASEX, and it is
also how the SGMII module operates where there are two register settings
to use on these modes.  What is confusing is how to call the SFPs using
which mode.

All the fiber transceivers like 1000Base-SX and 1000Base-LX operate in
1000BASEX mode.

All 10/100/1000Base-T SFPs I tested operate in SGMII mode.

All 1000Base-T SFPs with RJ45 connector operate in 1000BASEX mode at the
beginning.  If a PHY is found inside (typically Marvell) that PHY driver
can change the mode to SGMII.  If that PHY driver is forced to not change
it to SGMII mode then 1000BASEX mode can still be used.

The major difference between 1000BASEX and SGMII modes in KSZ9477 is
there are link up and link down interrupts in SGMII mode but only link up
interrupt in 1000BASEX mode.  The phylink code can use the SFP cage logic
to detect the fiber cable is unplugged and say the link is down, so that
may be why the implementation behaves like that, but that does not work
for 1000Base-T SFP that operates in 1000BASEX mode.
 
> > > KSZ9477 errata module 7 indicates the MII_ADVERTISE register needs to be
> > > set 0x01A0.  This is done with phylink_mii_c22_pcs_encode_advertisement()
> > > but only for 1000BaseX mode.  I probably need to add that code in SGMII
> > > configuration.
> 
> Hang on one moment... I think we're going off to another problem.
> 
> For 1000base-X, we do use phylink_mii_c22_pcs_encode_advertisement()
> which will generate the advertisement word for 1000base-X.
> 
> For Cisco SGMII, it will generate the tx_config word for a MAC-side
> setup (which is basically the fixed 0x4001 value.) From what I read
> in KSZ9477, this value would be unsuitable for a case where the
> following register values are:
> 
>         DW_VR_MII_PCS_MODE_C37_SGMII set
>         DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII set
>         DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL clear
> 
> meaning that we're generating a SGMII PHY-side word indicating the
> parameters to be used from the registers rather than hardware signals.
> 
> > > The default value of this register is 0x20.  This update
> > > depends on SFP.  So far I did not find a SGMII SFP that requires this
> > > setting.  This issue is more like the hardware did not set the default
> > > value properly.  As I said, the SGMII port works with SGMII SFP after
> > > power up without programming anything.

TX_CONFIG_PHY_SIDE_SGMII is never set for C37_SGMII mode.
Again I am not sure how this problem can be triggered.  I was just told
to set this value.  And a different chip with new IP has this value by
default.

> > > I am always confused by the master/slave - phy/mac nature of the SFP.
> > > The hardware designers seem to think the SGMII module is acting as a
> > > master then the slave is on the other side, like physically outside the
> > > chip.  I generally think of the slave is inside the SFP, as every board
> > > is programmed that way.
> 
> I think you're getting confused by microchip terminology.
> 
> Cisco SGMII is an asymmetric protocol. Cisco invented it as a way of:
> 1. supporting 10M and 100M speeds over a single data pair in each
>    direction.
> 2. sending the parameters of that link from the PHY to the MAC/PCS over
>    that single data pair.
> 
> They took the IEEE 1000base-X specification as a basis (which is
> symmetric negotiation via a 16-bit word).
> 
> The Cisco SGMII configuration word from the PHY to the PCS/MAC
> contains:
> 
>         bit 15 - link status
>         bit 14 - (reserved for AN acknowledge as per 1000base-X)
>         bit 13 - reserved (zero)
>         bit 12 - duplex mode
>         bit 11, 10 - speed
>         bits 9..1 - reserved (zero)
>         bit 0 - set to 1
> 
> This is "PHY" mode, or in Microchip parlence "master" mode - because
> the PHY is dictating what the other end should be doing.
> 
> When the PCS/MAC receives this, the PCS/MAC is expected to respond
> with a configuration word containing:
> 
>         bit 15 - zero
>         bit 14 - set to 1 (indicating acknowledge)
>         bit 13..1 - zero
>         bit 0 - set to 1
> 
> This is MAC mode, or in Microchip parlence "slave" mode - because the
> MAC is being told what it should do.
> 
> So, for a Cisco SGMII link with a SFP module which has a PHY embedded
> inside, you definitely want to be using MAC mode, because the PHY on
> the SFP module will be dictating the speed and duplex to the components
> outside of the SFP - in other words the PCS and MAC.

I do not know the internal working in the SGMII module where the
registers may have incorrect values.  I only verify the SGMII port is
working by sending and receiving traffic after changing some registers.

> > > There are some SFPs
> > > that will use only 1000BaseX mode.  I wonder why the SFP manufacturers do
> > > that.  It seems the PHY access is also not reliable as some registers
> > > always have 0xff value in lower part of the 16-bit value.  That may be
> > > the reason that there is a special Marvell PHY id just for Finisar.
> 
> I don't have any modules that have a Finisar PHY rather than a Marvell
> PHY. I wonder if the problem is that the Finisar module doesn't like
> multi-byte I2C accesses to the PHY.
> 
> Another thing - make sure that the I2C bus to the SFP cage is running
> at 100kHz, not 400kHz.

What I meant is this Marvell PHY ID 0x01ff0cc0.  Basically it is
0x01410cc0 for Marvell 88E1111 with 0x41 replaced with 0xff.  Some
registers like the status register even has 0xff value all the time so
the PHY can never report the link is down.
diff mbox series

Patch

diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 1faa37f0e7b9..ddf6cd4b37a7 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -768,6 +768,14 @@  static int xpcs_config_aneg_c37_1000basex(struct dw_xpcs *xpcs,
 		val |= DW_VR_MII_AN_INTR_EN;
 	}
 
+	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
+	    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) {
+		mask |= DW_VR_MII_SGMII_LINK_STS | DW_VR_MII_TX_CONFIG_MASK;
+		val |= FIELD_PREP(DW_VR_MII_TX_CONFIG_MASK,
+				  DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII);
+		val |= DW_VR_MII_SGMII_LINK_STS;
+	}
+
 	ret = xpcs_modify(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL, mask, val);
 	if (ret < 0)
 		return ret;
@@ -982,6 +990,15 @@  static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs,
 	if (ret < 0)
 		return ret;
 
+	/* DW_VR_MII_AN_STS_C37_ANCMPLT_INTR just means link change in SGMII
+	 * mode, so needs to be cleared.  It can appear just by itself, which
+	 * does not mean the link is up.
+	 */
+	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
+	    (ret & DW_VR_MII_AN_STS_C37_ANCMPLT_INTR)) {
+		ret &= ~DW_VR_MII_AN_STS_C37_ANCMPLT_INTR;
+		xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS, 0);
+	}
 	if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) {
 		int speed_value;
 
@@ -1037,6 +1054,18 @@  static int xpcs_get_state_c37_1000basex(struct dw_xpcs *xpcs,
 {
 	int lpa, bmsr;
 
+	/* DW_VR_MII_AN_STS_C37_ANCMPLT_INTR just means link change, so needs
+	 * to be cleared.  If polling is not used then it is cleared by
+	 * following code.
+	 */
+	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ && xpcs->pcs.poll) {
+		int val;
+
+		val = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS);
+		if (val & DW_VR_MII_AN_STS_C37_ANCMPLT_INTR)
+			xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS,
+				   0);
+	}
 	if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
 			      state->advertising)) {
 		/* Reset link state */
@@ -1138,9 +1167,14 @@  static void xpcs_link_up_sgmii_1000basex(struct dw_xpcs *xpcs,
 					 phy_interface_t interface,
 					 int speed, int duplex)
 {
+	u16 val = 0;
 	int ret;
 
-	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
+	/* Microchip KSZ SGMII implementation has a bug that needs MII_BMCR
+	 * register to be updated with current speed to pass traffic.
+	 */
+	if (xpcs->quirk != DW_XPCS_QUIRK_MICROCHIP_KSZ &&
+	    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
 		return;
 
 	if (interface == PHY_INTERFACE_MODE_1000BASEX) {
@@ -1155,10 +1189,18 @@  static void xpcs_link_up_sgmii_1000basex(struct dw_xpcs *xpcs,
 			dev_err(&xpcs->mdiodev->dev,
 				"%s: half duplex not supported\n",
 				__func__);
+
+		/* No need to update MII_BMCR register if not in SGMII mode. */
+		if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
+		    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
+			return;
 	}
 
+	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
+	    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
+		val = BMCR_ANENABLE;
 	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MII_BMCR,
-			 mii_bmcr_encode_fixed(speed, duplex));
+			 val | mii_bmcr_encode_fixed(speed, duplex));
 	if (ret)
 		dev_err(&xpcs->mdiodev->dev, "%s: xpcs_write returned %pe\n",
 			__func__, ERR_PTR(ret));
@@ -1563,5 +1605,11 @@  void xpcs_destroy_pcs(struct phylink_pcs *pcs)
 }
 EXPORT_SYMBOL_GPL(xpcs_destroy_pcs);
 
+void xpcs_set_quirk(struct dw_xpcs *xpcs, int quirk)
+{
+	xpcs->quirk = quirk;
+}
+EXPORT_SYMBOL_GPL(xpcs_set_quirk);
+
 MODULE_DESCRIPTION("Synopsys DesignWare XPCS library");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h
index adc5a0b3c883..1348a9a05ca6 100644
--- a/drivers/net/pcs/pcs-xpcs.h
+++ b/drivers/net/pcs/pcs-xpcs.h
@@ -73,6 +73,7 @@ 
 
 /* VR_MII_AN_CTRL */
 #define DW_VR_MII_AN_CTRL_8BIT			BIT(8)
+#define DW_VR_MII_SGMII_LINK_STS		BIT(4)
 #define DW_VR_MII_TX_CONFIG_MASK		BIT(3)
 #define DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII	0x1
 #define DW_VR_MII_TX_CONFIG_MAC_SIDE_SGMII	0x0
@@ -122,6 +123,7 @@  struct dw_xpcs {
 	struct phylink_pcs pcs;
 	phy_interface_t interface;
 	bool need_reset;
+	int quirk;
 };
 
 int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg);
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 733f4ddd2ef1..7fccbff2383d 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -41,6 +41,10 @@  enum dw_xpcs_pma_id {
 	WX_TXGBE_XPCS_PMA_10G_ID = 0x0018fc80,
 };
 
+enum dw_xpcs_quirks {
+	DW_XPCS_QUIRK_MICROCHIP_KSZ = 1,
+};
+
 struct dw_xpcs_info {
 	u32 pcs;
 	u32 pma;
@@ -59,4 +63,6 @@  void xpcs_destroy(struct dw_xpcs *xpcs);
 struct phylink_pcs *xpcs_create_pcs_mdiodev(struct mii_bus *bus, int addr);
 void xpcs_destroy_pcs(struct phylink_pcs *pcs);
 
+void xpcs_set_quirk(struct dw_xpcs *xpcs, int quirk);
+
 #endif /* __LINUX_PCS_XPCS_H */