diff mbox series

[iwl-next,v2] ice: Disable Cage Max Power override

Message ID 20230824085459.35998-1-wojciech.drewek@intel.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [iwl-next,v2] ice: Disable Cage Max Power override | expand

Checks

Context Check Description
netdev/series_format warning Single patches do not need cover letters; Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1330 this patch: 1330
netdev/cc_maintainers warning 6 maintainers not CCed: kuba@kernel.org jesse.brandeburg@intel.com davem@davemloft.net anthony.l.nguyen@intel.com pabeni@redhat.com edumazet@google.com
netdev/build_clang success Errors and warnings before: 1353 this patch: 1353
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1353 this patch: 1353
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 79 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Wojciech Drewek Aug. 24, 2023, 8:54 a.m. UTC
NVM module called "Cage Max Power override" allows to
change max power in the cage. This can be achieved
using external tools. The responsibility of the ice driver is to
go back to the default settings whenever port split is done.
This is achieved by clearing Override Enable bit in the
NVM module. Override of the max power is disabled so the
default value will be used.

Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
---
v2: Move ICE_NUM_OF_CAGES to ice_adminq_cmd.h,
    ice_devlink_aq_clear_cmpo doc changes
---
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   | 11 +++++++
 drivers/net/ethernet/intel/ice/ice_devlink.c  | 32 +++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_nvm.c      |  2 +-
 drivers/net/ethernet/intel/ice/ice_nvm.h      |  4 +++
 4 files changed, 48 insertions(+), 1 deletion(-)

Comments

Przemek Kitszel Aug. 24, 2023, 9:38 a.m. UTC | #1
On 8/24/23 10:54, Wojciech Drewek wrote:
> NVM module called "Cage Max Power override" allows to
> change max power in the cage. This can be achieved
> using external tools. The responsibility of the ice driver is to
> go back to the default settings whenever port split is done.
> This is achieved by clearing Override Enable bit in the
> NVM module. Override of the max power is disabled so the
> default value will be used.
> 
> Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
> ---
> v2: Move ICE_NUM_OF_CAGES to ice_adminq_cmd.h,
>      ice_devlink_aq_clear_cmpo doc changes
> ---
>   .../net/ethernet/intel/ice/ice_adminq_cmd.h   | 11 +++++++
>   drivers/net/ethernet/intel/ice/ice_devlink.c  | 32 +++++++++++++++++++
>   drivers/net/ethernet/intel/ice/ice_nvm.c      |  2 +-
>   drivers/net/ethernet/intel/ice/ice_nvm.h      |  4 +++
>   4 files changed, 48 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> index ffbe9d3a5d77..01eadeb46db2 100644
> --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> @@ -1569,6 +1569,17 @@ struct ice_aqc_nvm {
>   	__le32 addr_low;
>   };
>   
> +#define ICE_AQC_NVM_CMPO_MOD_ID			0x153
> +
> +#define ICE_NUM_OF_CAGES 8
> +
> +/* Cage Max Power override NVM module */
> +struct ice_aqc_nvm_cmpo {
> +	__le16 length;
> +#define ICE_AQC_NVM_CMPO_ENABLE	BIT(8)
> +	__le16 cages_cfg[ICE_NUM_OF_CAGES];
> +};
> +
>   #define ICE_AQC_NVM_START_POINT			0
>   
>   /* NVM Checksum Command (direct, 0x0706) */
> diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
> index 80dc5445b50d..2bd570073bdc 100644
> --- a/drivers/net/ethernet/intel/ice/ice_devlink.c
> +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
> @@ -591,6 +591,33 @@ static void ice_devlink_port_options_print(struct ice_pf *pf)
>   	kfree(options);
>   }
>   
> +/**
> + * ice_devlink_aq_clear_cmpo - clear Cage Max Power override
> + * @hw: pointer to the HW struct
> + *
> + * Clear Cage Max Power override enable bit for each of the cages
> + */
> +static int
> +ice_devlink_aq_clear_cmpo(struct ice_hw *hw)
> +{
> +	struct ice_aqc_nvm_cmpo data;
> +	int ret, i;
> +
> +	/* Read Cage Max Power override NVM module */
> +	ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_CMPO_MOD_ID, 0, sizeof(data),
> +			      &data, true, false, NULL);
> +	if (ret)
> +		return ret;
> +
> +	for (i = 0; i < ICE_NUM_OF_CAGES; i++)
> +		data.cages_cfg[i] &= ~cpu_to_le16(ICE_AQC_NVM_CMPO_ENABLE);
> +
> +	/* Do not update the length word since it is not permitted */
> +	return ice_aq_update_nvm(hw, ICE_AQC_NVM_CMPO_MOD_ID, 2,
> +				 sizeof(data.cages_cfg), data.cages_cfg,
> +				 false, 0, NULL);
> +}
> +
>   /**
>    * ice_devlink_aq_set_port_option - Send set port option admin queue command
>    * @pf: the PF to print split port options
> @@ -623,6 +650,11 @@ ice_devlink_aq_set_port_option(struct ice_pf *pf, u8 option_idx,
>   		return -EIO;
>   	}
>   
> +	status = ice_devlink_aq_clear_cmpo(&pf->hw);
> +	if (status)
> +		dev_dbg(dev, "Failed to clear Cage Max Power override, err %d aq_err %d\n",
> +			status, pf->hw.adminq.sq_last_status);
> +
>   	status = ice_nvm_write_activate(&pf->hw, ICE_AQC_NVM_ACTIV_REQ_EMPR, NULL);
>   	if (status) {
>   		dev_dbg(dev, "ice_nvm_write_activate failed, err %d aq_err %d\n",
> diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
> index f6f52a248066..745f2459943f 100644
> --- a/drivers/net/ethernet/intel/ice/ice_nvm.c
> +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
> @@ -18,7 +18,7 @@
>    *
>    * Read the NVM using the admin queue commands (0x0701)
>    */
> -static int
> +int
>   ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, u16 length,
>   		void *data, bool last_command, bool read_shadow_ram,
>   		struct ice_sq_cd *cd)
> diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h
> index 774c2317967d..90f36e19e06b 100644
> --- a/drivers/net/ethernet/intel/ice/ice_nvm.h
> +++ b/drivers/net/ethernet/intel/ice/ice_nvm.h
> @@ -15,6 +15,10 @@ struct ice_orom_civd_info {
>   int ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access);
>   void ice_release_nvm(struct ice_hw *hw);
>   int
> +ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, u16 length,
> +		void *data, bool last_command, bool read_shadow_ram,
> +		struct ice_sq_cd *cd);

@cd param is always NULL, we could drop it
(not necessarily in this series)

> +int
>   ice_read_flat_nvm(struct ice_hw *hw, u32 offset, u32 *length, u8 *data,
>   		  bool read_shadow_ram);
>   int


Thanks,
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Jakub Kicinski Aug. 24, 2023, 3:32 p.m. UTC | #2
On Thu, 24 Aug 2023 10:54:59 +0200 Wojciech Drewek wrote:
> NVM module called "Cage Max Power override" allows to
> change max power in the cage. This can be achieved
> using external tools. The responsibility of the ice driver is to
> go back to the default settings whenever port split is done.
> This is achieved by clearing Override Enable bit in the
> NVM module. Override of the max power is disabled so the
> default value will be used.

Can you say more? We have ETHTOOL_MSG_MODULE_GET / SET, sounds like
something we could quite easily get ethtool to support?
Wojciech Drewek Aug. 25, 2023, 11:01 a.m. UTC | #3
CC: Ido

> -----Original Message-----
> From: Jakub Kicinski <kuba@kernel.org>
> Sent: czwartek, 24 sierpnia 2023 17:32
> To: Drewek, Wojciech <wojciech.drewek@intel.com>
> Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Kitszel, Przemyslaw <przemyslaw.kitszel@intel.com>
> Subject: Re: [PATCH iwl-next v2] ice: Disable Cage Max Power override
> 
> On Thu, 24 Aug 2023 10:54:59 +0200 Wojciech Drewek wrote:
> > NVM module called "Cage Max Power override" allows to
> > change max power in the cage. This can be achieved
> > using external tools. The responsibility of the ice driver is to
> > go back to the default settings whenever port split is done.
> > This is achieved by clearing Override Enable bit in the
> > NVM module. Override of the max power is disabled so the
> > default value will be used.
> 
> Can you say more? We have ETHTOOL_MSG_MODULE_GET / SET, sounds like
> something we could quite easily get ethtool to support?

So you're suggesting that ethtool could support setting the maximum power in the cage? 
Something like:
 - new "--set-module" parameter called "power-max"
 - new "--get-module" parameters: "power-max-allowed", "power-min-allowed" indicating limitations reported by the HW.

About the patch itself, it's only about restoration of the default settings upon port split. Those might be overwritten by 
Intel's external tools.
Jakub Kicinski Aug. 26, 2023, 12:54 a.m. UTC | #4
On Fri, 25 Aug 2023 11:01:07 +0000 Drewek, Wojciech wrote:
> > Can you say more? We have ETHTOOL_MSG_MODULE_GET / SET, sounds like
> > something we could quite easily get ethtool to support?  
> 
> So you're suggesting that ethtool could support setting the maximum power in the cage? 
> Something like:
>  - new "--set-module" parameter called "power-max"
>  - new "--get-module" parameters: "power-max-allowed",
>   "power-min-allowed" indicating limitations reported by the HW.

Yup. Oh, nice you even CCed Ido already :)

> About the patch itself, it's only about restoration of the default
> settings upon port split. Those might be overwritten by Intel's
> external tools.

I guess, the thing that sent me down the path of putting the control
in hands of the user more directly was the question of "why do we need
to reset on port split"? Why is that a policy the driver is supposed
to follow?
Ido Schimmel Aug. 27, 2023, 8:47 a.m. UTC | #5
On Fri, Aug 25, 2023 at 11:01:07AM +0000, Drewek, Wojciech wrote:
> CC: Ido
> 
> > -----Original Message-----
> > From: Jakub Kicinski <kuba@kernel.org>
> > Sent: czwartek, 24 sierpnia 2023 17:32
> > To: Drewek, Wojciech <wojciech.drewek@intel.com>
> > Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Kitszel, Przemyslaw <przemyslaw.kitszel@intel.com>
> > Subject: Re: [PATCH iwl-next v2] ice: Disable Cage Max Power override
> > 
> > On Thu, 24 Aug 2023 10:54:59 +0200 Wojciech Drewek wrote:
> > > NVM module called "Cage Max Power override" allows to
> > > change max power in the cage. This can be achieved
> > > using external tools. The responsibility of the ice driver is to
> > > go back to the default settings whenever port split is done.
> > > This is achieved by clearing Override Enable bit in the
> > > NVM module. Override of the max power is disabled so the
> > > default value will be used.
> > 
> > Can you say more? We have ETHTOOL_MSG_MODULE_GET / SET, sounds like
> > something we could quite easily get ethtool to support?
> 
> So you're suggesting that ethtool could support setting the maximum power in the cage? 
> Something like:
>  - new "--set-module" parameter called "power-max"
>  - new "--get-module" parameters: "power-max-allowed", "power-min-allowed" indicating limitations reported by the HW.
> 
> About the patch itself, it's only about restoration of the default settings upon port split. Those might be overwritten by 
> Intel's external tools.

Can you please explain why this setting needs to be changed in the first
place and why it needs to be restored to the default on port split?
Wojciech Drewek Aug. 29, 2023, 9:12 a.m. UTC | #6
> -----Original Message-----
> From: Ido Schimmel <idosch@idosch.org>
> Sent: niedziela, 27 sierpnia 2023 10:47
> To: Drewek, Wojciech <wojciech.drewek@intel.com>
> Cc: Jakub Kicinski <kuba@kernel.org>; intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Kitszel, Przemyslaw
> <przemyslaw.kitszel@intel.com>; idosch@nvidia.com
> Subject: Re: [PATCH iwl-next v2] ice: Disable Cage Max Power override
> 
> On Fri, Aug 25, 2023 at 11:01:07AM +0000, Drewek, Wojciech wrote:
> > CC: Ido
> >
> > > -----Original Message-----
> > > From: Jakub Kicinski <kuba@kernel.org>
> > > Sent: czwartek, 24 sierpnia 2023 17:32
> > > To: Drewek, Wojciech <wojciech.drewek@intel.com>
> > > Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Kitszel, Przemyslaw <przemyslaw.kitszel@intel.com>
> > > Subject: Re: [PATCH iwl-next v2] ice: Disable Cage Max Power override
> > >
> > > On Thu, 24 Aug 2023 10:54:59 +0200 Wojciech Drewek wrote:
> > > > NVM module called "Cage Max Power override" allows to
> > > > change max power in the cage. This can be achieved
> > > > using external tools. The responsibility of the ice driver is to
> > > > go back to the default settings whenever port split is done.
> > > > This is achieved by clearing Override Enable bit in the
> > > > NVM module. Override of the max power is disabled so the
> > > > default value will be used.
> > >
> > > Can you say more? We have ETHTOOL_MSG_MODULE_GET / SET, sounds like
> > > something we could quite easily get ethtool to support?
> >
> > So you're suggesting that ethtool could support setting the maximum power in the cage?
> > Something like:
> >  - new "--set-module" parameter called "power-max"
> >  - new "--get-module" parameters: "power-max-allowed", "power-min-allowed" indicating limitations reported by the HW.
> >
> > About the patch itself, it's only about restoration of the default settings upon port split. Those might be overwritten by
> > Intel's external tools.
> 
> Can you please explain why this setting needs to be changed in the first
> place and why it needs to be restored to the default on port split?

In some cases users are trying to use media with power exceeding max allowed value.
Port split require system reboot so it feels natural to me to restore default settings.
Ido Schimmel Aug. 30, 2023, 3:17 p.m. UTC | #7
On Tue, Aug 29, 2023 at 09:12:22AM +0000, Drewek, Wojciech wrote:
> In some cases users are trying to use media with power exceeding max allowed value.
> Port split require system reboot so it feels natural to me to restore default settings.

I don't believe it's the kernel's responsibility to undo changes done by
external tools. Given that the tool is able to change this setting, I
assume it can also restore it back to default.

Moreover, it doesn't sound like port split won't work without this
change, so placing this change there only because we assume that a
reboot will follow seems random.

I think the best way forward is to extend ethtool as was already
suggested. It should allow you to avoid the split brain situation where
the hardware is configured by both the kernel and an external tool.
Wojciech Drewek Sept. 1, 2023, 1:34 p.m. UTC | #8
> -----Original Message-----
> From: Ido Schimmel <idosch@idosch.org>
> Sent: środa, 30 sierpnia 2023 17:17
> To: Drewek, Wojciech <wojciech.drewek@intel.com>
> Cc: Jakub Kicinski <kuba@kernel.org>; intel-wired-lan@lists.osuosl.org;
> netdev@vger.kernel.org; Kitszel, Przemyslaw <przemyslaw.kitszel@intel.com>;
> idosch@nvidia.com
> Subject: Re: [PATCH iwl-next v2] ice: Disable Cage Max Power override
> 
> On Tue, Aug 29, 2023 at 09:12:22AM +0000, Drewek, Wojciech wrote:
> > In some cases users are trying to use media with power exceeding max
> allowed value.
> > Port split require system reboot so it feels natural to me to restore default
> settings.
> 
> I don't believe it's the kernel's responsibility to undo changes done by
> external tools. Given that the tool is able to change this setting, I
> assume it can also restore it back to default.

I agree with that, but we can end up with no link if we don't restore
default settings. Let me explain how.

> 
> Moreover, it doesn't sound like port split won't work without this
> change, so placing this change there only because we assume that a
> reboot will follow seems random.

After port split, we might end up with no link in one of the ports.
In dual port card if we increase max pwr on the 1st cage the 2nd one
will have max pwr decreased automatically. This might be useful if we have port
option with count 1, the second cage is not used in this case. If we then split and
use two ports now, the second port will use second cage which has decreased max pwr, default module
used there will not work.

So, should we leave the restoration of the default settings to the user?

> 
> I think the best way forward is to extend ethtool as was already
> suggested. It should allow you to avoid the split brain situation where
> the hardware is configured by both the kernel and an external tool.

I'll try to follow up with the ethtool extension.
Ido Schimmel Sept. 12, 2023, 2:26 p.m. UTC | #9
On Fri, Sep 01, 2023 at 01:34:04PM +0000, Drewek, Wojciech wrote:
> 
> 
> > -----Original Message-----
> > From: Ido Schimmel <idosch@idosch.org>
> > Sent: środa, 30 sierpnia 2023 17:17
> > To: Drewek, Wojciech <wojciech.drewek@intel.com>
> > Cc: Jakub Kicinski <kuba@kernel.org>; intel-wired-lan@lists.osuosl.org;
> > netdev@vger.kernel.org; Kitszel, Przemyslaw <przemyslaw.kitszel@intel.com>;
> > idosch@nvidia.com
> > Subject: Re: [PATCH iwl-next v2] ice: Disable Cage Max Power override
> > 
> > On Tue, Aug 29, 2023 at 09:12:22AM +0000, Drewek, Wojciech wrote:
> > > In some cases users are trying to use media with power exceeding max
> > allowed value.
> > > Port split require system reboot so it feels natural to me to restore default
> > settings.
> > 
> > I don't believe it's the kernel's responsibility to undo changes done by
> > external tools. Given that the tool is able to change this setting, I
> > assume it can also restore it back to default.
> 
> I agree with that, but we can end up with no link if we don't restore
> default settings. Let me explain how.
> 
> > 
> > Moreover, it doesn't sound like port split won't work without this
> > change, so placing this change there only because we assume that a
> > reboot will follow seems random.
> 
> After port split, we might end up with no link in one of the ports.
> In dual port card if we increase max pwr on the 1st cage the 2nd one
> will have max pwr decreased automatically. This might be useful if we have port
> option with count 1, the second cage is not used in this case. If we then split and
> use two ports now, the second port will use second cage which has decreased max pwr, default module
> used there will not work.

Not sure I understand how it's related to port split. You have a dual
port card with two netdevs (e.g., eth0 and eth1) and two cages. You used
some tool to increase the max power on the first cage which means that
the second cage will have its max power decreased. Now you split the
first port:

# devlink port split eth0 count 2

eth0s0 and eth0s1 correspond to the first cage. Why are they affected by
the second cage?

I have a feeling we mean different things by "port split". As far as I'm
concerned, you split a port in order to connect a splitter cable to the
cage. For example:
https://network.nvidia.com/related-docs/prod_cables/PB_MCP7H50-Vxxxyzz_200GbE_QSFP56_to_2x100GbE_QSFP56_DAC.pdf

> 
> So, should we leave the restoration of the default settings to the user?

Let's first clear up the above. BTW, if a port doesn't come up because
of power issues you can try communicating it to user space using
'ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED'.

> 
> > 
> > I think the best way forward is to extend ethtool as was already
> > suggested. It should allow you to avoid the split brain situation where
> > the hardware is configured by both the kernel and an external tool.
> 
> I'll try to follow up with the ethtool extension.
Wojciech Drewek Sept. 15, 2023, 1:15 p.m. UTC | #10
> -----Original Message-----
> From: Ido Schimmel <idosch@idosch.org>
> Sent: Tuesday, September 12, 2023 4:27 PM
> To: Drewek, Wojciech <wojciech.drewek@intel.com>
> Cc: Jakub Kicinski <kuba@kernel.org>; intel-wired-lan@lists.osuosl.org;
> netdev@vger.kernel.org; Kitszel, Przemyslaw <przemyslaw.kitszel@intel.com>;
> idosch@nvidia.com
> Subject: Re: [PATCH iwl-next v2] ice: Disable Cage Max Power override
> 
> On Fri, Sep 01, 2023 at 01:34:04PM +0000, Drewek, Wojciech wrote:
> >
> >
> > > -----Original Message-----
> > > From: Ido Schimmel <idosch@idosch.org>
> > > Sent: środa, 30 sierpnia 2023 17:17
> > > To: Drewek, Wojciech <wojciech.drewek@intel.com>
> > > Cc: Jakub Kicinski <kuba@kernel.org>; intel-wired-lan@lists.osuosl.org;
> > > netdev@vger.kernel.org; Kitszel, Przemyslaw
> <przemyslaw.kitszel@intel.com>;
> > > idosch@nvidia.com
> > > Subject: Re: [PATCH iwl-next v2] ice: Disable Cage Max Power override
> > >
> > > On Tue, Aug 29, 2023 at 09:12:22AM +0000, Drewek, Wojciech wrote:
> > > > In some cases users are trying to use media with power exceeding max
> > > allowed value.
> > > > Port split require system reboot so it feels natural to me to restore default
> > > settings.
> > >
> > > I don't believe it's the kernel's responsibility to undo changes done by
> > > external tools. Given that the tool is able to change this setting, I
> > > assume it can also restore it back to default.
> >
> > I agree with that, but we can end up with no link if we don't restore
> > default settings. Let me explain how.
> >
> > >
> > > Moreover, it doesn't sound like port split won't work without this
> > > change, so placing this change there only because we assume that a
> > > reboot will follow seems random.
> >
> > After port split, we might end up with no link in one of the ports.
> > In dual port card if we increase max pwr on the 1st cage the 2nd one
> > will have max pwr decreased automatically. This might be useful if we have port
> > option with count 1, the second cage is not used in this case. If we then split and
> > use two ports now, the second port will use second cage which has decreased
> max pwr, default module
> > used there will not work.
> 
> Not sure I understand how it's related to port split. You have a dual
> port card with two netdevs (e.g., eth0 and eth1) and two cages. You used
> some tool to increase the max power on the first cage which means that
> the second cage will have its max power decreased. Now you split the
> first port:
> 
> # devlink port split eth0 count 2
> 
> eth0s0 and eth0s1 correspond to the first cage. Why are they affected by
> the second cage?
> 
> I have a feeling we mean different things by "port split". As far as I'm
> concerned, you split a port in order to connect a splitter cable to the
> cage. For example:
> https://network.nvidia.com/related-docs/prod_cables/PB_MCP7H50-
> Vxxxyzz_200GbE_QSFP56_to_2x100GbE_QSFP56_DAC.pdf

In ice driver port split works per device not per port. According to
/Documentation/networking/devlink/ice.rst, section "Port split":
	The "ice" driver supports port splitting only for port 0, as the FW has
	a predefined set of available port split options for the whole device.
And if you look at available port options (same file) you'll see that in case of "Split count" 1
only quad 1 is working. And in case of "Split count" 2 the second quad might be used. So, if we
increase max_pwr in the first quad in case of "Split count" 1 and then switch to "Split count" 2,
the second quad might end up with no link (because it will have decreased max_pwr).

> 
> >
> > So, should we leave the restoration of the default settings to the user?
> 
> Let's first clear up the above. BTW, if a port doesn't come up because
> of power issues you can try communicating it to user space using
> 'ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED'.

Thanks for suggestion.

PS I'll be on holidays for two weeks so sorry for lack of reply in advance :)

> 
> >
> > >
> > > I think the best way forward is to extend ethtool as was already
> > > suggested. It should allow you to avoid the split brain situation where
> > > the hardware is configured by both the kernel and an external tool.
> >
> > I'll try to follow up with the ethtool extension.
Ido Schimmel Sept. 21, 2023, 12:09 p.m. UTC | #11
On Fri, Sep 15, 2023 at 01:15:01PM +0000, Drewek, Wojciech wrote:
> In ice driver port split works per device not per port. According to
> /Documentation/networking/devlink/ice.rst, section "Port split":
> 	The "ice" driver supports port splitting only for port 0, as the FW has
> 	a predefined set of available port split options for the whole device.
> And if you look at available port options (same file) you'll see that in case of "Split count" 1
> only quad 1 is working. And in case of "Split count" 2 the second quad might be used. So, if we
> increase max_pwr in the first quad in case of "Split count" 1 and then switch to "Split count" 2,
> the second quad might end up with no link (because it will have decreased max_pwr).

But there's also an option where the second cage isn't actually used.
Anyway, my suggestion is to allow user space to set / get the max power
using ethtool and give user space visibility about link down reason via
the ethtool extended state.
Wojciech Drewek Oct. 5, 2023, 1:58 p.m. UTC | #12
> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf Of Ido
> Schimmel
> Sent: Thursday, September 21, 2023 2:09 PM
> To: Drewek, Wojciech <wojciech.drewek@intel.com>
> Cc: Jakub Kicinski <kuba@kernel.org>; idosch@nvidia.com; intel-wired-
> lan@lists.osuosl.org; Kitszel, Przemyslaw <przemyslaw.kitszel@intel.com>;
> netdev@vger.kernel.org
> Subject: Re: [Intel-wired-lan] [PATCH iwl-next v2] ice: Disable Cage Max Power
> override
> 
> On Fri, Sep 15, 2023 at 01:15:01PM +0000, Drewek, Wojciech wrote:
> > In ice driver port split works per device not per port. According to
> > /Documentation/networking/devlink/ice.rst, section "Port split":
> > 	The "ice" driver supports port splitting only for port 0, as the FW has
> > 	a predefined set of available port split options for the whole device.
> > And if you look at available port options (same file) you'll see that in case of
> "Split count" 1
> > only quad 1 is working. And in case of "Split count" 2 the second quad might
> be used. So, if we
> > increase max_pwr in the first quad in case of "Split count" 1 and then switch
> to "Split count" 2,
> > the second quad might end up with no link (because it will have decreased
> max_pwr).
> 
> But there's also an option where the second cage isn't actually used.
> Anyway, my suggestion is to allow user space to set / get the max power
> using ethtool and give user space visibility about link down reason via
> the ethtool extended state.

Thanks for discussion Ido, we will follow your suggestion.

> _______________________________________________
> Intel-wired-lan mailing list
> Intel-wired-lan@osuosl.org
> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index ffbe9d3a5d77..01eadeb46db2 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1569,6 +1569,17 @@  struct ice_aqc_nvm {
 	__le32 addr_low;
 };
 
+#define ICE_AQC_NVM_CMPO_MOD_ID			0x153
+
+#define ICE_NUM_OF_CAGES 8
+
+/* Cage Max Power override NVM module */
+struct ice_aqc_nvm_cmpo {
+	__le16 length;
+#define ICE_AQC_NVM_CMPO_ENABLE	BIT(8)
+	__le16 cages_cfg[ICE_NUM_OF_CAGES];
+};
+
 #define ICE_AQC_NVM_START_POINT			0
 
 /* NVM Checksum Command (direct, 0x0706) */
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
index 80dc5445b50d..2bd570073bdc 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.c
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -591,6 +591,33 @@  static void ice_devlink_port_options_print(struct ice_pf *pf)
 	kfree(options);
 }
 
+/**
+ * ice_devlink_aq_clear_cmpo - clear Cage Max Power override
+ * @hw: pointer to the HW struct
+ *
+ * Clear Cage Max Power override enable bit for each of the cages
+ */
+static int
+ice_devlink_aq_clear_cmpo(struct ice_hw *hw)
+{
+	struct ice_aqc_nvm_cmpo data;
+	int ret, i;
+
+	/* Read Cage Max Power override NVM module */
+	ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_CMPO_MOD_ID, 0, sizeof(data),
+			      &data, true, false, NULL);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ICE_NUM_OF_CAGES; i++)
+		data.cages_cfg[i] &= ~cpu_to_le16(ICE_AQC_NVM_CMPO_ENABLE);
+
+	/* Do not update the length word since it is not permitted */
+	return ice_aq_update_nvm(hw, ICE_AQC_NVM_CMPO_MOD_ID, 2,
+				 sizeof(data.cages_cfg), data.cages_cfg,
+				 false, 0, NULL);
+}
+
 /**
  * ice_devlink_aq_set_port_option - Send set port option admin queue command
  * @pf: the PF to print split port options
@@ -623,6 +650,11 @@  ice_devlink_aq_set_port_option(struct ice_pf *pf, u8 option_idx,
 		return -EIO;
 	}
 
+	status = ice_devlink_aq_clear_cmpo(&pf->hw);
+	if (status)
+		dev_dbg(dev, "Failed to clear Cage Max Power override, err %d aq_err %d\n",
+			status, pf->hw.adminq.sq_last_status);
+
 	status = ice_nvm_write_activate(&pf->hw, ICE_AQC_NVM_ACTIV_REQ_EMPR, NULL);
 	if (status) {
 		dev_dbg(dev, "ice_nvm_write_activate failed, err %d aq_err %d\n",
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index f6f52a248066..745f2459943f 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -18,7 +18,7 @@ 
  *
  * Read the NVM using the admin queue commands (0x0701)
  */
-static int
+int
 ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, u16 length,
 		void *data, bool last_command, bool read_shadow_ram,
 		struct ice_sq_cd *cd)
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h
index 774c2317967d..90f36e19e06b 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.h
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.h
@@ -15,6 +15,10 @@  struct ice_orom_civd_info {
 int ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access);
 void ice_release_nvm(struct ice_hw *hw);
 int
+ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, u16 length,
+		void *data, bool last_command, bool read_shadow_ram,
+		struct ice_sq_cd *cd);
+int
 ice_read_flat_nvm(struct ice_hw *hw, u32 offset, u32 *length, u8 *data,
 		  bool read_shadow_ram);
 int