diff mbox series

[1/3] net: mana: Add speed support in mana_get_link_ksettings

Message ID 1742473341-15262-2-git-send-email-ernis@linux.microsoft.com (mailing list archive)
State New
Headers show
Series Add support for speed in MANA ethtool | expand

Commit Message

Erni Sri Satya Vennela March 20, 2025, 12:22 p.m. UTC
Add support for speed in mana ethtool get_link_ksettings
operation. This feature is not supported by all hardware.

Signed-off-by: Erni Sri Satya Vennela <ernis@linux.microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 42 +++++++++++++++++++
 .../ethernet/microsoft/mana/mana_ethtool.c    |  6 +++
 include/net/mana/mana.h                       | 17 ++++++++
 3 files changed, 65 insertions(+)

Comments

Andrew Lunn March 20, 2025, 1:37 p.m. UTC | #1
> +int mana_query_link_cfg(struct mana_port_context *apc)
> +{
> +	struct net_device *ndev = apc->ndev;
> +	struct mana_query_link_config_req req = {};
> +	struct mana_query_link_config_resp resp = {};
> +	int err;
> +
> +	mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_LINK_CONFIG,
> +			     sizeof(req), sizeof(resp));
> +
> +	req.vport = apc->port_handle;
> +
> +	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
> +				sizeof(resp));
> +
> +	if (err) {
> +		netdev_err(ndev, "Failed to query link config: %d\n", err);
> +		goto out;
> +	}
> +
> +	err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_LINK_CONFIG,
> +				   sizeof(resp));
> +
> +	if (err || resp.hdr.status) {
> +		netdev_err(ndev, "Failed to query link config: %d, 0x%x\n", err,
> +			   resp.hdr.status);
> +		if (!err)
> +			err = -EPROTO;
> +		goto out;
> +	}
> +
> +	if (resp.qos_unconfigured) {
> +		err = -EINVAL;
> +		goto out;
> +	}
> +	apc->speed = resp.link_speed;

Might be worth adding a comment that the firmware is returning speed
in Mbps.

Or name the struct member link_speed_mbps.

	Andrew
Erni Sri Satya Vennela March 24, 2025, 5:43 p.m. UTC | #2
On Thu, Mar 20, 2025 at 02:37:47PM +0100, Andrew Lunn wrote:
> > +int mana_query_link_cfg(struct mana_port_context *apc)
> > +{
> > +	struct net_device *ndev = apc->ndev;
> > +	struct mana_query_link_config_req req = {};
> > +	struct mana_query_link_config_resp resp = {};
> > +	int err;
> > +
> > +	mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_LINK_CONFIG,
> > +			     sizeof(req), sizeof(resp));
> > +
> > +	req.vport = apc->port_handle;
> > +
> > +	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
> > +				sizeof(resp));
> > +
> > +	if (err) {
> > +		netdev_err(ndev, "Failed to query link config: %d\n", err);
> > +		goto out;
> > +	}
> > +
> > +	err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_LINK_CONFIG,
> > +				   sizeof(resp));
> > +
> > +	if (err || resp.hdr.status) {
> > +		netdev_err(ndev, "Failed to query link config: %d, 0x%x\n", err,
> > +			   resp.hdr.status);
> > +		if (!err)
> > +			err = -EPROTO;
> > +		goto out;
> > +	}
> > +
> > +	if (resp.qos_unconfigured) {
> > +		err = -EINVAL;
> > +		goto out;
> > +	}
> > +	apc->speed = resp.link_speed;
> 
> Might be worth adding a comment that the firmware is returning speed
> in Mbps.
> 
> Or name the struct member link_speed_mbps.
> 
Thank you for your suggestion. I'll make this change for the next
version of this patchset.
> 	Andrew
Andrew Lunn March 24, 2025, 6:44 p.m. UTC | #3
On Mon, Mar 24, 2025 at 10:43:39AM -0700, Erni Sri Satya Vennela wrote:
> On Thu, Mar 20, 2025 at 02:37:47PM +0100, Andrew Lunn wrote:
> > > +int mana_query_link_cfg(struct mana_port_context *apc)
> > > +{
> > > +	struct net_device *ndev = apc->ndev;
> > > +	struct mana_query_link_config_req req = {};
> > > +	struct mana_query_link_config_resp resp = {};
> > > +	int err;
> > > +
> > > +	mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_LINK_CONFIG,
> > > +			     sizeof(req), sizeof(resp));
> > > +
> > > +	req.vport = apc->port_handle;
> > > +
> > > +	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
> > > +				sizeof(resp));
> > > +
> > > +	if (err) {
> > > +		netdev_err(ndev, "Failed to query link config: %d\n", err);
> > > +		goto out;
> > > +	}
> > > +
> > > +	err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_LINK_CONFIG,
> > > +				   sizeof(resp));
> > > +
> > > +	if (err || resp.hdr.status) {
> > > +		netdev_err(ndev, "Failed to query link config: %d, 0x%x\n", err,
> > > +			   resp.hdr.status);
> > > +		if (!err)
> > > +			err = -EPROTO;
> > > +		goto out;
> > > +	}
> > > +
> > > +	if (resp.qos_unconfigured) {
> > > +		err = -EINVAL;
> > > +		goto out;
> > > +	}
> > > +	apc->speed = resp.link_speed;
> > 
> > Might be worth adding a comment that the firmware is returning speed
> > in Mbps.
> > 
> > Or name the struct member link_speed_mbps.
> > 
> Thank you for your suggestion. I'll make this change for the next
> version of this patchset.

Please answer my other questions before posting the next version of
the patch. I'm really questioning if this is the correct uAPI to be
using. You have a very poor description of what you are trying to
do. Maybe TC is the better fit? Does this speed apply to ingress and
egress? If so, there are two leaky buckets, so why only one
configuration value? Or can you only configure ingress?

Also, if i understand correctly MANA is a virtual device and this is
the VM side of it. If this is used for bandwidth limitation, why is
the VM controlling this, not the hypervisor? What is the security
model?

	Andrew
Erni Sri Satya Vennela March 25, 2025, 4:25 p.m. UTC | #4
On Mon, Mar 24, 2025 at 07:44:39PM +0100, Andrew Lunn wrote:
> On Mon, Mar 24, 2025 at 10:43:39AM -0700, Erni Sri Satya Vennela wrote:
> > On Thu, Mar 20, 2025 at 02:37:47PM +0100, Andrew Lunn wrote:
> > > > +int mana_query_link_cfg(struct mana_port_context *apc)
> > > > +{
> > > > +	struct net_device *ndev = apc->ndev;
> > > > +	struct mana_query_link_config_req req = {};
> > > > +	struct mana_query_link_config_resp resp = {};
> > > > +	int err;
> > > > +
> > > > +	mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_LINK_CONFIG,
> > > > +			     sizeof(req), sizeof(resp));
> > > > +
> > > > +	req.vport = apc->port_handle;
> > > > +
> > > > +	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
> > > > +				sizeof(resp));
> > > > +
> > > > +	if (err) {
> > > > +		netdev_err(ndev, "Failed to query link config: %d\n", err);
> > > > +		goto out;
> > > > +	}
> > > > +
> > > > +	err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_LINK_CONFIG,
> > > > +				   sizeof(resp));
> > > > +
> > > > +	if (err || resp.hdr.status) {
> > > > +		netdev_err(ndev, "Failed to query link config: %d, 0x%x\n", err,
> > > > +			   resp.hdr.status);
> > > > +		if (!err)
> > > > +			err = -EPROTO;
> > > > +		goto out;
> > > > +	}
> > > > +
> > > > +	if (resp.qos_unconfigured) {
> > > > +		err = -EINVAL;
> > > > +		goto out;
> > > > +	}
> > > > +	apc->speed = resp.link_speed;
> > > 
> > > Might be worth adding a comment that the firmware is returning speed
> > > in Mbps.
> > > 
> > > Or name the struct member link_speed_mbps.
> > > 
> > Thank you for your suggestion. I'll make this change for the next
> > version of this patchset.
> 
> Please answer my other questions before posting the next version of
> the patch. I'm really questioning if this is the correct uAPI to be
> using. You have a very poor description of what you are trying to
> do. Maybe TC is the better fit? Does this speed apply to ingress and
> egress? If so, there are two leaky buckets, so why only one
> configuration value? Or can you only configure ingress?
> 
The QoS control is at the hardware/firmware level and applies to the
egress rate.
> Also, if i understand correctly MANA is a virtual device and this is
> the VM side of it. If this is used for bandwidth limitation, why is
> the VM controlling this, not the hypervisor? What is the security
> model?
> 
In certain cluster and hardware versions, Azure allows this API to
restrict the bandwidth limit to a lesser value than what was configured
by the Azure control plane. The device will not allow a higher limit
than what was configured through the Azure control plane to be set by
the VM through this API.
> 	Andrew
Andrew Lunn March 25, 2025, 4:43 p.m. UTC | #5
> The QoS control is at the hardware/firmware level and applies to the
> egress rate.

egress relative to the VM? So what the VM sends to the hypervisor.
There is no restriction the other way, hypervisor to the VM?

That is not what link modes do. 10Mbps is the limit in both
directions.

> > Also, if i understand correctly MANA is a virtual device and this is
> > the VM side of it. If this is used for bandwidth limitation, why is
> > the VM controlling this, not the hypervisor? What is the security
> > model?
> > 
> In certain cluster and hardware versions, Azure allows this API to
> restrict the bandwidth limit to a lesser value than what was configured
> by the Azure control plane. The device will not allow a higher limit
> than what was configured through the Azure control plane to be set by
> the VM through this API.

So all this information needs adding to the commit message. When you
are using an API in a strange way, you have to expect questions to be
asked, and you can save a lot of time by answering those questions in
the commit message, before they are even asked.

So, i think this is the wrong API.

Please implement it as a TC offload. I'm not an TC expert, but htb
might work.

	Andrew
diff mbox series

Patch

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index aa1e47233fe5..5fa8e1e2ff9a 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1161,6 +1161,48 @@  static int mana_cfg_vport_steering(struct mana_port_context *apc,
 	return err;
 }
 
+int mana_query_link_cfg(struct mana_port_context *apc)
+{
+	struct net_device *ndev = apc->ndev;
+	struct mana_query_link_config_req req = {};
+	struct mana_query_link_config_resp resp = {};
+	int err;
+
+	mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_LINK_CONFIG,
+			     sizeof(req), sizeof(resp));
+
+	req.vport = apc->port_handle;
+
+	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
+				sizeof(resp));
+
+	if (err) {
+		netdev_err(ndev, "Failed to query link config: %d\n", err);
+		goto out;
+	}
+
+	err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_LINK_CONFIG,
+				   sizeof(resp));
+
+	if (err || resp.hdr.status) {
+		netdev_err(ndev, "Failed to query link config: %d, 0x%x\n", err,
+			   resp.hdr.status);
+		if (!err)
+			err = -EPROTO;
+		goto out;
+	}
+
+	if (resp.qos_unconfigured) {
+		err = -EINVAL;
+		goto out;
+	}
+	apc->speed = resp.link_speed;
+	return 0;
+
+out:
+	return err;
+}
+
 int mana_create_wq_obj(struct mana_port_context *apc,
 		       mana_handle_t vport,
 		       u32 wq_type, struct mana_obj_spec *wq_spec,
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index c419626073f5..48234a738d26 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -427,6 +427,12 @@  static int mana_set_ringparam(struct net_device *ndev,
 static int mana_get_link_ksettings(struct net_device *ndev,
 				   struct ethtool_link_ksettings *cmd)
 {
+	struct mana_port_context *apc = netdev_priv(ndev);
+	int err;
+
+	err = mana_query_link_cfg(apc);
+
+	cmd->base.speed = (err) ? SPEED_UNKNOWN : apc->speed;
 	cmd->base.duplex = DUPLEX_FULL;
 	cmd->base.port = PORT_OTHER;
 
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 0d00b24eacaf..5f039ce99ade 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -468,6 +468,8 @@  struct mana_port_context {
 
 	u16 port_idx;
 
+	u32 speed;
+
 	bool port_is_up;
 	bool port_st_save; /* Saved port state */
 
@@ -497,6 +499,7 @@  struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
 void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog);
 int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
 void mana_query_gf_stats(struct mana_port_context *apc);
+int mana_query_link_cfg(struct mana_port_context *apc);
 int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues);
 void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
 
@@ -523,6 +526,7 @@  enum mana_command_code {
 	MANA_FENCE_RQ		= 0x20006,
 	MANA_CONFIG_VPORT_RX	= 0x20007,
 	MANA_QUERY_VPORT_CONFIG	= 0x20008,
+	MANA_QUERY_LINK_CONFIG	= 0x2000A,
 
 	/* Privileged commands for the PF mode */
 	MANA_REGISTER_FILTER	= 0x28000,
@@ -531,6 +535,19 @@  enum mana_command_code {
 	MANA_DEREGISTER_HW_PORT	= 0x28004,
 };
 
+/* Query Link Configuration*/
+struct mana_query_link_config_req {
+	struct gdma_req_hdr hdr;
+	mana_handle_t vport;
+}; /* HW DATA */
+
+struct mana_query_link_config_resp {
+	struct gdma_resp_hdr hdr;
+	u32 link_speed;
+	bool qos_unconfigured;
+	u8 reserved[3];
+}; /* HW DATA */
+
 /* Query Device Configuration */
 struct mana_query_device_cfg_req {
 	struct gdma_req_hdr hdr;