diff mbox series

[net-next,v2] ethtool: add netlink based get rxfh support

Message ID 20221104234244.242527-1-sudheer.mogilappagari@intel.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net-next,v2] ethtool: add netlink based get rxfh support | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2 this patch: 2
netdev/cc_maintainers warning 10 maintainers not CCed: wangjie125@huawei.com huangguangbin2@huawei.com pabeni@redhat.com chenhao288@hisilicon.com davem@davemloft.net edumazet@google.com sbhatta@marvell.com linux@rempel-privat.de linux-doc@vger.kernel.org bagasdotme@gmail.com
netdev/build_clang success Errors and warnings before: 5 this patch: 5
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2 this patch: 2
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 85 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Mogilappagari, Sudheer Nov. 4, 2022, 11:42 p.m. UTC
Implement RXFH_GET request to get RSS table, hash key
and hash function of an interface. This is netlink
equivalent implementation of ETHTOOL_GRSSH ioctl request.

Signed-off-by: Sudheer Mogilappagari <sudheer.mogilappagari@intel.com>
---
v2: Fix cleanup in error path instead of returning.
---
 Documentation/networking/ethtool-netlink.rst |  28 +++-
 include/uapi/linux/ethtool_netlink.h         |  15 ++
 net/ethtool/Makefile                         |   2 +-
 net/ethtool/netlink.c                        |  10 ++
 net/ethtool/netlink.h                        |   2 +
 net/ethtool/rxfh.c                           | 158 +++++++++++++++++++
 6 files changed, 213 insertions(+), 2 deletions(-)
 create mode 100644 net/ethtool/rxfh.c

Comments

Jakub Kicinski Nov. 8, 2022, 2:25 a.m. UTC | #1
On Fri,  4 Nov 2022 16:42:44 -0700 Sudheer Mogilappagari wrote:
> Implement RXFH_GET request to get RSS table, hash key
> and hash function of an interface. This is netlink
> equivalent implementation of ETHTOOL_GRSSH ioctl request.

Motivation would be good to have, if any. Are you going to add new
fields or is it simply to fill in the implementation gap we have in
the netlink version?

> +RXFH_GET
> +========
> +
> +Get RSS table, hash key and hash function info like ``ETHTOOL_GRSSH``
> +ioctl request.


Can we describe in more detail which commands are reimplemented?
Otherwise calling the command RXFH makes little sense.
We may be better of using RSS in the name in the first place.

> +Request contents:
> +
> +=====================================  ======  ==========================
> +  ``ETHTOOL_A_RXFH_HEADER``            nested  request header
> +  ``ETHTOOL_A_RXFH_RSS_CONTEXT``       u32     context number
> + ====================================  ======  ==========================
> +
> +Kernel response contents:
> +
> +=====================================  ======  ==========================
> +  ``ETHTOOL_A_RXFH_HEADER``            nested  reply header
> +  ``ETHTOOL_A_RXFH_RSS_CONTEXT``       u32     RSS context number
> +  ``ETHTOOL_A_RXFH_INDIR_SIZE``        u32     RSS Indirection table size
> +  ``ETHTOOL_A_RXFH_KEY_SIZE``          u32     RSS hash key size
> +  ``ETHTOOL_A_RXFH_HFUNC``             u32     RSS hash func

This is u8 in the implementation, please make the implementation u32 as
documented.

> +  ``ETHTOOL_A_RXFH_RSS_CONFIG``        u32     Indir table and hkey bytes

These should really be separate attributes.

Do we even need the indir_size and key_size given every attribute has 
a length so user can just look at the length of the attrs to see the
length?

> +static int rxfh_prepare_data(const struct ethnl_req_info *req_base,
> +			     struct ethnl_reply_data *reply_base,
> +			     struct genl_info *info)
> +{
> +	struct rxfh_reply_data *data = RXFH_REPDATA(reply_base);
> +	struct net_device *dev = reply_base->dev;
> +	struct ethtool_rxfh *rxfh = &data->rxfh;
> +	struct ethnl_req_info req_info = {};
> +	struct nlattr **tb = info->attrs;
> +	u32 indir_size = 0, hkey_size = 0;
> +	const struct ethtool_ops *ops;
> +	u32 total_size, indir_bytes;
> +	bool mod = false;
> +	u8 dev_hfunc = 0;
> +	u8 *hkey = NULL;
> +	u8 *rss_config;
> +	int ret;
> +
> +	ops = dev->ethtool_ops;
> +	if (!ops->get_rxfh)
> +		return -EOPNOTSUPP;
> +
> +	ret = ethnl_parse_header_dev_get(&req_info,
> +					 tb[ETHTOOL_A_RXFH_HEADER],
> +					 genl_info_net(info), info->extack,
> +					 true);

Why are you parsing again?

You hook in ethnl_default_doit() and ethnl_default_dumpit(),
which should call the parsing for you already.

> +	if (ret < 0)
> +		return ret;
> +
> +	ethnl_update_u32(&rxfh->rss_context, tb[ETHTOOL_A_RXFH_RSS_CONTEXT],
> +			 &mod);

ethnl_update_u32() is for when you're updating the config.
You can use plain netlink helpers to get request arguments.

> +	/* Some drivers don't handle rss_context */
> +	if (rxfh->rss_context && !ops->get_rxfh_context) {
> +		ret = -EOPNOTSUPP;
> +		goto out_dev;
> +	}
> +
> +	ret = ethnl_ops_begin(dev);
> +	if (ret < 0)
> +		goto out_dev;
> +
> +	if (ops->get_rxfh_indir_size)
> +		indir_size = ops->get_rxfh_indir_size(dev);
> +	if (ops->get_rxfh_key_size)
> +		hkey_size = ops->get_rxfh_key_size(dev);
> +
> +	indir_bytes = indir_size * sizeof(rxfh->rss_config[0]);
> +	total_size = indir_bytes + hkey_size;
> +	rss_config = kzalloc(total_size, GFP_USER);

GFP_KERNEL is enough here

> +	if (!rss_config) {
> +		ret = -ENOMEM;
> +		goto out_ops;
> +	}
> +
> +	if (indir_size) {
> +		data->rss_config = (u32 *)rss_config;
> +		rxfh->indir_size = indir_size;
> +	}
> +
> +	if (hkey_size) {
> +		hkey = rss_config + indir_bytes;
> +		rxfh->key_size = hkey_size;
> +	}
> +
> +	if (rxfh->rss_context)
> +		ret = ops->get_rxfh_context(dev, data->rss_config, hkey,
> +					    &dev_hfunc, rxfh->rss_context);
> +	else
> +		ret = ops->get_rxfh(dev, data->rss_config, hkey, &dev_hfunc);

This will not be sufficient for dump, I'm afraid.

For dump we need to find a way to dump all contexts in all devices.
Which may require extending the driver API. Maybe drop the dump
implementation for now?

> +	rxfh->hfunc = dev_hfunc;
> +
> +out_ops:
> +	ethnl_ops_complete(dev);
> +out_dev:
> +	ethnl_parse_header_dev_put(&req_info);
> +	return ret;
> +}
Mogilappagari, Sudheer Nov. 10, 2022, 12:26 a.m. UTC | #2
> -----Original Message-----
> From: Jakub Kicinski <kuba@kernel.org>
> Sent: Monday, November 7, 2022 6:26 PM
> Subject: Re: [PATCH net-next v2] ethtool: add netlink based get rxfh
> support
> 
> On Fri,  4 Nov 2022 16:42:44 -0700 Sudheer Mogilappagari wrote:
> > Implement RXFH_GET request to get RSS table, hash key and hash
> > function of an interface. This is netlink equivalent implementation
> of
> > ETHTOOL_GRSSH ioctl request.
> 
> Motivation would be good to have, if any. Are you going to add new
> fields or is it simply to fill in the implementation gap we have in the
> netlink version?
> 

Will add more explanation here. Goal was to implement existing
functionality first and then extend by adding new context 
specific parameters.   

> > +RXFH_GET
> > +========
> > +
> > +Get RSS table, hash key and hash function info like
> ``ETHTOOL_GRSSH``
> > +ioctl request.
> 
> 
> Can we describe in more detail which commands are reimplemented?
> Otherwise calling the command RXFH makes little sense.
> We may be better of using RSS in the name in the first place.

This is the ethtool command being reimplemented.
ethtool -x|--show-rxfh-indir DEVNAME   Show Rx flow hash indirection table and/or RSS hash key
        [ context %d ]

Picked RXFH based on existing function names and ethtool_rxfh
structure. If it needs to change, how about RSS_CTX or just RSS ? 

> > +  ``ETHTOOL_A_RXFH_HEADER``            nested  reply header
> > +  ``ETHTOOL_A_RXFH_RSS_CONTEXT``       u32     RSS context number
> > +  ``ETHTOOL_A_RXFH_INDIR_SIZE``        u32     RSS Indirection table
> size
> > +  ``ETHTOOL_A_RXFH_KEY_SIZE``          u32     RSS hash key size
> > +  ``ETHTOOL_A_RXFH_HFUNC``             u32     RSS hash func
> 
> This is u8 in the implementation, please make the implementation u32 as
> documented.

This should have been u8 instead. Will make it consistent.

> 
> > +  ``ETHTOOL_A_RXFH_RSS_CONFIG``        u32     Indir table and hkey
> bytes
> 
> These should really be separate attributes.
> 
> Do we even need the indir_size and key_size given every attribute has a
> length so user can just look at the length of the attrs to see the
> length?

We can split indir table and hkey in netlink implementation and sizes 
won't be needed anymore. Above format is based on ethtool_rxfh 
structure where indir table and hkey come together as last member of
structure. Will update it in next version.

> 
> > +static int rxfh_prepare_data(const struct ethnl_req_info *req_base,
> > +			     struct ethnl_reply_data *reply_base,
> > +			     struct genl_info *info)
> > +{
> > +	struct rxfh_reply_data *data = RXFH_REPDATA(reply_base);
> > +	struct net_device *dev = reply_base->dev;
> > +	struct ethtool_rxfh *rxfh = &data->rxfh;
> > +	struct ethnl_req_info req_info = {};
> > +	struct nlattr **tb = info->attrs;
> > +	u32 indir_size = 0, hkey_size = 0;
> > +	const struct ethtool_ops *ops;
> > +	u32 total_size, indir_bytes;
> > +	bool mod = false;
> > +	u8 dev_hfunc = 0;
> > +	u8 *hkey = NULL;
> > +	u8 *rss_config;
> > +	int ret;
> > +
> > +	ops = dev->ethtool_ops;
> > +	if (!ops->get_rxfh)
> > +		return -EOPNOTSUPP;
> > +
> > +	ret = ethnl_parse_header_dev_get(&req_info,
> > +					 tb[ETHTOOL_A_RXFH_HEADER],
> > +					 genl_info_net(info), info->extack,
> > +					 true);
> 
> Why are you parsing again?
> 
> You hook in ethnl_default_doit() and ethnl_default_dumpit(), which
> should call the parsing for you already.
> 

My bad. Had used other netlink get command implementation as reference
and overlooked request_ops->parse_request. 

> > +	if (ret < 0)
> > +		return ret;
> > +
> > +	ethnl_update_u32(&rxfh->rss_context,
> tb[ETHTOOL_A_RXFH_RSS_CONTEXT],
> > +			 &mod);
> 
> ethnl_update_u32() is for when you're updating the config.
> You can use plain netlink helpers to get request arguments.

Ack.

> > +	/* Some drivers don't handle rss_context */
> > +	if (rxfh->rss_context && !ops->get_rxfh_context) {
> > +		ret = -EOPNOTSUPP;
> > +		goto out_dev;
> > +	}
> > +
> > +	ret = ethnl_ops_begin(dev);
> > +	if (ret < 0)
> > +		goto out_dev;
> > +
> > +	if (ops->get_rxfh_indir_size)
> > +		indir_size = ops->get_rxfh_indir_size(dev);
> > +	if (ops->get_rxfh_key_size)
> > +		hkey_size = ops->get_rxfh_key_size(dev);
> > +
> > +	indir_bytes = indir_size * sizeof(rxfh->rss_config[0]);
> > +	total_size = indir_bytes + hkey_size;
> > +	rss_config = kzalloc(total_size, GFP_USER);
> 
> GFP_KERNEL is enough here
> 

Will fix in next version.

> > +	if (!rss_config) {
> > +		ret = -ENOMEM;
> > +		goto out_ops;
> > +	}
> > +
> > +	if (indir_size) {
> > +		data->rss_config = (u32 *)rss_config;
> > +		rxfh->indir_size = indir_size;
> > +	}
> > +
> > +	if (hkey_size) {
> > +		hkey = rss_config + indir_bytes;
> > +		rxfh->key_size = hkey_size;
> > +	}
> > +
> > +	if (rxfh->rss_context)
> > +		ret = ops->get_rxfh_context(dev, data->rss_config, hkey,
> > +					    &dev_hfunc, rxfh->rss_context);
> > +	else
> > +		ret = ops->get_rxfh(dev, data->rss_config, hkey,
> &dev_hfunc);
> 
> This will not be sufficient for dump, I'm afraid.
> 
> For dump we need to find a way to dump all contexts in all devices.
> Which may require extending the driver API. Maybe drop the dump
> implementation for now?
> 

Agree. Will remove dumpit for this command.

> > +	rxfh->hfunc = dev_hfunc;
> > +
> > +out_ops:
> > +	ethnl_ops_complete(dev);
> > +out_dev:
> > +	ethnl_parse_header_dev_put(&req_info);
> > +	return ret;
> > +}
Jakub Kicinski Nov. 10, 2022, 12:46 a.m. UTC | #3
On Thu, 10 Nov 2022 00:26:23 +0000 Mogilappagari, Sudheer wrote:
> > Can we describe in more detail which commands are reimplemented?
> > Otherwise calling the command RXFH makes little sense.
> > We may be better of using RSS in the name in the first place.  
> 
> This is the ethtool command being reimplemented.
> ethtool -x|--show-rxfh-indir DEVNAME   Show Rx flow hash indirection table and/or RSS hash key
>         [ context %d ]
> 
> Picked RXFH based on existing function names and ethtool_rxfh
> structure. If it needs to change, how about RSS_CTX or just RSS ? 

I vote for just RSS.

> > > +  ``ETHTOOL_A_RXFH_HEADER``            nested  reply header
> > > +  ``ETHTOOL_A_RXFH_RSS_CONTEXT``       u32     RSS context number
> > > +  ``ETHTOOL_A_RXFH_INDIR_SIZE``        u32     RSS Indirection table size  
> > > +  ``ETHTOOL_A_RXFH_KEY_SIZE``          u32     RSS hash key size
> > > +  ``ETHTOOL_A_RXFH_HFUNC``             u32     RSS hash func  
> > 
> > This is u8 in the implementation, please make the implementation u32 as
> > documented.  
> 
> This should have been u8 instead. Will make it consistent.

u32 is better, data sizes in netlink are rounded up to 4 bytes anyway,
so u8 is 1 usable byte and 3 bytes of padding. Much better to use u32.

> > > +static int rxfh_prepare_data(const struct ethnl_req_info *req_base,
> > > +			     struct ethnl_reply_data *reply_base,
> > > +			     struct genl_info *info)
> > > +{
> > > +	struct rxfh_reply_data *data = RXFH_REPDATA(reply_base);
> > > +	struct net_device *dev = reply_base->dev;
> > > +	struct ethtool_rxfh *rxfh = &data->rxfh;
> > > +	struct ethnl_req_info req_info = {};
> > > +	struct nlattr **tb = info->attrs;
> > > +	u32 indir_size = 0, hkey_size = 0;
> > > +	const struct ethtool_ops *ops;
> > > +	u32 total_size, indir_bytes;
> > > +	bool mod = false;
> > > +	u8 dev_hfunc = 0;
> > > +	u8 *hkey = NULL;
> > > +	u8 *rss_config;
> > > +	int ret;
> > > +
> > > +	ops = dev->ethtool_ops;
> > > +	if (!ops->get_rxfh)
> > > +		return -EOPNOTSUPP;
> > > +
> > > +	ret = ethnl_parse_header_dev_get(&req_info,
> > > +					 tb[ETHTOOL_A_RXFH_HEADER],
> > > +					 genl_info_net(info), info->extack,
> > > +					 true);  
> > 
> > Why are you parsing again?
> > 
> > You hook in ethnl_default_doit() and ethnl_default_dumpit(), which
> > should call the parsing for you already.
> 
> My bad. Had used other netlink get command implementation as reference
> and overlooked request_ops->parse_request. 

Right, as you probably discovered the core ethtool code can do a lot of
work for you if the command doesn't have special requirements and you
can rely on the default doit / dumpit handling.
Samudrala, Sridhar Nov. 10, 2022, 10:08 p.m. UTC | #4
On 11/9/2022 6:46 PM, Jakub Kicinski wrote:
> On Thu, 10 Nov 2022 00:26:23 +0000 Mogilappagari, Sudheer wrote:
>>> Can we describe in more detail which commands are reimplemented?
>>> Otherwise calling the command RXFH makes little sense.
>>> We may be better of using RSS in the name in the first place.
>> This is the ethtool command being reimplemented.
>> ethtool -x|--show-rxfh-indir DEVNAME   Show Rx flow hash indirection table and/or RSS hash key
>>          [ context %d ]
>>
>> Picked RXFH based on existing function names and ethtool_rxfh
>> structure. If it needs to change, how about RSS_CTX or just RSS ?
> I vote for just RSS.

Can we use QGRP as a prefix to indicate that these are per-queue group parameters
and not restricted to RSS related parameters?

   QGRP_CONTEXT
   QGRP_RSS_HFUNC
   QGRP_RSS_KEY
   QGRP_RSS_INDIR_TABLE

In future, we would like to add per-queue group parameters like
   QGRP_INLINE_FLOW_STEERING (Round robin flow steering of TCP flows)

Thanks
Sridhar
Jakub Kicinski Nov. 10, 2022, 10:34 p.m. UTC | #5
On Thu, 10 Nov 2022 16:08:04 -0600 Samudrala, Sridhar wrote:
> Can we use QGRP as a prefix to indicate that these are per-queue group parameters
> and not restricted to RSS related parameters?
> 
>    QGRP_CONTEXT
>    QGRP_RSS_HFUNC
>    QGRP_RSS_KEY
>    QGRP_RSS_INDIR_TABLE
> 
> In future, we would like to add per-queue group parameters like
>    QGRP_INLINE_FLOW_STEERING (Round robin flow steering of TCP flows)

The RSS context thing is a pretty shallow abstraction, I don't think we
should be extending it into "queue groups" or whatnot. We'll probably
need some devlink objects at some point (rate configuration?) and
locking order is devlink > rtnl, so spawning things from within ethtool
will be a pain :S
Samudrala, Sridhar Nov. 10, 2022, 11:24 p.m. UTC | #6
On 11/10/2022 4:34 PM, Jakub Kicinski wrote:
> On Thu, 10 Nov 2022 16:08:04 -0600 Samudrala, Sridhar wrote:
>> Can we use QGRP as a prefix to indicate that these are per-queue group parameters
>> and not restricted to RSS related parameters?
>>
>>     QGRP_CONTEXT
>>     QGRP_RSS_HFUNC
>>     QGRP_RSS_KEY
>>     QGRP_RSS_INDIR_TABLE
>>
>> In future, we would like to add per-queue group parameters like
>>     QGRP_INLINE_FLOW_STEERING (Round robin flow steering of TCP flows)
> The RSS context thing is a pretty shallow abstraction, I don't think we
> should be extending it into "queue groups" or whatnot. We'll probably
> need some devlink objects at some point (rate configuration?) and
> locking order is devlink > rtnl, so spawning things from within ethtool
> will be a pain :S

We are going this path of extending ethtool rss context interface to support
per queue-group parameters based on this feedback.
   https://lore.kernel.org/netdev/20220314131114.635d5acb@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com/

Per queue-group rate can already be configured when creating queue groups via
tc-mpqrio interface using bw_rlimit parameters.
Jakub Kicinski Nov. 11, 2022, 12:12 a.m. UTC | #7
On Thu, 10 Nov 2022 17:24:15 -0600 Samudrala, Sridhar wrote:
> > The RSS context thing is a pretty shallow abstraction, I don't think we
> > should be extending it into "queue groups" or whatnot. We'll probably
> > need some devlink objects at some point (rate configuration?) and
> > locking order is devlink > rtnl, so spawning things from within ethtool
> > will be a pain :S  
> 
> We are going this path of extending ethtool rss context interface to support
> per queue-group parameters based on this feedback.
>    https://lore.kernel.org/netdev/20220314131114.635d5acb@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com/
> 
> Per queue-group rate can already be configured when creating queue groups via
> tc-mpqrio interface using bw_rlimit parameters.

Right, but that's still just flow-director-y/hash-y thing?
Does the name RSS imply purely hash based distribution?

My worry is that if we go with a more broad name like
"queue group" someone may be mislead to adding controls 
unrelated to flow <> queue assignment here.
Samudrala, Sridhar Nov. 14, 2022, 4:23 a.m. UTC | #8
On 11/10/2022 6:12 PM, Jakub Kicinski wrote:
> On Thu, 10 Nov 2022 17:24:15 -0600 Samudrala, Sridhar wrote:
>>> The RSS context thing is a pretty shallow abstraction, I don't think we
>>> should be extending it into "queue groups" or whatnot. We'll probably
>>> need some devlink objects at some point (rate configuration?) and
>>> locking order is devlink > rtnl, so spawning things from within ethtool
>>> will be a pain :S
>> We are going this path of extending ethtool rss context interface to support
>> per queue-group parameters based on this feedback.
>>     https://lore.kernel.org/netdev/20220314131114.635d5acb@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com/
>>
>> Per queue-group rate can already be configured when creating queue groups via
>> tc-mpqrio interface using bw_rlimit parameters.
> Right, but that's still just flow-director-y/hash-y thing?

Yes. The initial per queue-group parameter we want to add is to make incoming
TCP connections to be distributed in a round-robin manner by over-riding RSS
hash based queue when a SYN is received and let a flow director rule added
when a SYN-ACK is sent out.


> Does the name RSS imply purely hash based distribution?

The name Receive Side Scaling doesn't explicitly say that the distribution
is hash based, but I think it is a general assumption that it is based on hash.
But if you are OK to use RSS prefix for flow-director based distributions,
we will use ethtool rss-context interface for this parameter.

>
> My worry is that if we go with a more broad name like
> "queue group" someone may be mislead to adding controls
> unrelated to flow <> queue assignment here.

Later we would like to add a per queue-group parameter that would allow
reducing/changing the number of napi pollers for a queue group from the default
value equal to the number of queues in the queue group. Are you suggesting
creating a queue-group object and use devlink API to configure such parameters
for a queue-group?
Jakub Kicinski Nov. 14, 2022, 5:15 p.m. UTC | #9
On Sun, 13 Nov 2022 22:23:25 -0600 Samudrala, Sridhar wrote:
> > My worry is that if we go with a more broad name like
> > "queue group" someone may be mislead to adding controls
> > unrelated to flow <> queue assignment here.  
> 
> Later we would like to add a per queue-group parameter that would allow
> reducing/changing the number of napi pollers for a queue group from the default
> value equal to the number of queues in the queue group. Are you suggesting
> creating a queue-group object and use devlink API to configure such parameters
> for a queue-group?

I was thinking devlink because of scheduler/QoS and resource control.
For NAPI config not so sure, but either way RSS will not be a place 
for NAPI/IRQ config.
Samudrala, Sridhar Nov. 15, 2022, 1:46 a.m. UTC | #10
On 11/14/2022 11:15 AM, Jakub Kicinski wrote:
> On Sun, 13 Nov 2022 22:23:25 -0600 Samudrala, Sridhar wrote:
>>> My worry is that if we go with a more broad name like
>>> "queue group" someone may be mislead to adding controls
>>> unrelated to flow <> queue assignment here.
>> Later we would like to add a per queue-group parameter that would allow
>> reducing/changing the number of napi pollers for a queue group from the default
>> value equal to the number of queues in the queue group. Are you suggesting
>> creating a queue-group object and use devlink API to configure such parameters
>> for a queue-group?
> I was thinking devlink because of scheduler/QoS and resource control.
> For NAPI config not so sure, but either way RSS will not be a place
> for NAPI/IRQ config.

Another option we could go with is via sysfs by exposing
   /sys/class/net/<iface>/queue_groups/qg<0-n>/num_pollers

With this option, it may be possible to also consider making per-netdev
napi parameters like threaded and napi_defer_hard_irqs etc to be
per-queue-group parameters.
diff mbox series

Patch

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index d578b8bcd8a4..4420a2dc952e 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -222,6 +222,7 @@  Userspace to kernel:
   ``ETHTOOL_MSG_MODULE_GET``            get transceiver module parameters
   ``ETHTOOL_MSG_PSE_SET``               set PSE parameters
   ``ETHTOOL_MSG_PSE_GET``               get PSE parameters
+  ``ETHTOOL_MSG_RXFH_GET``              get RSS settings
   ===================================== =================================
 
 Kernel to userspace:
@@ -263,6 +264,7 @@  Kernel to userspace:
   ``ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY``    PHC virtual clocks info
   ``ETHTOOL_MSG_MODULE_GET_REPLY``         transceiver module parameters
   ``ETHTOOL_MSG_PSE_GET_REPLY``            PSE parameters
+  ``ETHTOOL_MSG_RXFH_GET_REPLY``           RSS settings
   ======================================== =================================
 
 ``GET`` requests are sent by userspace applications to retrieve device
@@ -1686,6 +1688,30 @@  to control PoDL PSE Admin functions. This option is implementing
 ``IEEE 802.3-2018`` 30.15.1.2.1 acPoDLPSEAdminControl. See
 ``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` for supported values.
 
+RXFH_GET
+========
+
+Get RSS table, hash key and hash function info like ``ETHTOOL_GRSSH``
+ioctl request.
+
+Request contents:
+
+=====================================  ======  ==========================
+  ``ETHTOOL_A_RXFH_HEADER``            nested  request header
+  ``ETHTOOL_A_RXFH_RSS_CONTEXT``       u32     context number
+ ====================================  ======  ==========================
+
+Kernel response contents:
+
+=====================================  ======  ==========================
+  ``ETHTOOL_A_RXFH_HEADER``            nested  reply header
+  ``ETHTOOL_A_RXFH_RSS_CONTEXT``       u32     RSS context number
+  ``ETHTOOL_A_RXFH_INDIR_SIZE``        u32     RSS Indirection table size
+  ``ETHTOOL_A_RXFH_KEY_SIZE``          u32     RSS hash key size
+  ``ETHTOOL_A_RXFH_HFUNC``             u32     RSS hash func
+  ``ETHTOOL_A_RXFH_RSS_CONFIG``        u32     Indir table and hkey bytes
+ ====================================  ======  ==========================
+
 Request translation
 ===================
 
@@ -1738,7 +1764,7 @@  are netlink only.
   ``ETHTOOL_SFLAGS``                  ``ETHTOOL_MSG_FEATURES_SET``
   ``ETHTOOL_GPFLAGS``                 ``ETHTOOL_MSG_PRIVFLAGS_GET``
   ``ETHTOOL_SPFLAGS``                 ``ETHTOOL_MSG_PRIVFLAGS_SET``
-  ``ETHTOOL_GRXFH``                   n/a
+  ``ETHTOOL_GRXFH``                   ``ETHTOOL_MSG_RXFH_GET``
   ``ETHTOOL_SRXFH``                   n/a
   ``ETHTOOL_GGRO``                    ``ETHTOOL_MSG_FEATURES_GET``
   ``ETHTOOL_SGRO``                    ``ETHTOOL_MSG_FEATURES_SET``
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index bb57084ac524..a5ce6fadcd05 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -51,6 +51,7 @@  enum {
 	ETHTOOL_MSG_MODULE_SET,
 	ETHTOOL_MSG_PSE_GET,
 	ETHTOOL_MSG_PSE_SET,
+	ETHTOOL_MSG_RXFH_GET,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_USER_CNT,
@@ -97,6 +98,7 @@  enum {
 	ETHTOOL_MSG_MODULE_GET_REPLY,
 	ETHTOOL_MSG_MODULE_NTF,
 	ETHTOOL_MSG_PSE_GET_REPLY,
+	ETHTOOL_MSG_RXFH_GET_REPLY,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_KERNEL_CNT,
@@ -879,6 +881,19 @@  enum {
 	ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1)
 };
 
+enum {
+	ETHTOOL_A_RXFH_UNSPEC,
+	ETHTOOL_A_RXFH_HEADER,
+	ETHTOOL_A_RXFH_RSS_CONTEXT,		/* u32 */
+	ETHTOOL_A_RXFH_INDIR_SIZE,		/* u32 */
+	ETHTOOL_A_RXFH_KEY_SIZE,		/* u32 */
+	ETHTOOL_A_RXFH_HFUNC,			/* u8 */
+	ETHTOOL_A_RXFH_RSS_CONFIG,
+
+	__ETHTOOL_A_RXFH_CNT,
+	ETHTOOL_A_RXFH_MAX = (__ETHTOOL_A_RXFH_CNT - 1),
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 72ab0944262a..234a063008e6 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -4,7 +4,7 @@  obj-y				+= ioctl.o common.o
 
 obj-$(CONFIG_ETHTOOL_NETLINK)	+= ethtool_nl.o
 
-ethtool_nl-y	:= netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
+ethtool_nl-y	:= netlink.o bitset.o strset.o linkinfo.o linkmodes.o rxfh.o \
 		   linkstate.o debug.o wol.o features.o privflags.o rings.o \
 		   channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
 		   tunnels.o fec.o eeprom.o stats.o phc_vclocks.o module.o \
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 1a4c11356c96..2265a835330b 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -287,6 +287,7 @@  ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
 	[ETHTOOL_MSG_PHC_VCLOCKS_GET]	= &ethnl_phc_vclocks_request_ops,
 	[ETHTOOL_MSG_MODULE_GET]	= &ethnl_module_request_ops,
 	[ETHTOOL_MSG_PSE_GET]		= &ethnl_pse_request_ops,
+	[ETHTOOL_MSG_RXFH_GET]		= &ethnl_rxfh_request_ops,
 };
 
 static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -1040,6 +1041,15 @@  static const struct genl_ops ethtool_genl_ops[] = {
 		.policy = ethnl_pse_set_policy,
 		.maxattr = ARRAY_SIZE(ethnl_pse_set_policy) - 1,
 	},
+	{
+		.cmd	= ETHTOOL_MSG_RXFH_GET,
+		.doit	= ethnl_default_doit,
+		.start	= ethnl_default_start,
+		.dumpit	= ethnl_default_dumpit,
+		.done	= ethnl_default_done,
+		.policy = ethnl_rxfh_get_policy,
+		.maxattr = ARRAY_SIZE(ethnl_rxfh_get_policy) - 1,
+	},
 };
 
 static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 1bfd374f9718..1ec92da7b173 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -346,6 +346,7 @@  extern const struct ethnl_request_ops ethnl_stats_request_ops;
 extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops;
 extern const struct ethnl_request_ops ethnl_module_request_ops;
 extern const struct ethnl_request_ops ethnl_pse_request_ops;
+extern const struct ethnl_request_ops ethnl_rxfh_request_ops;
 
 extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
 extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -386,6 +387,7 @@  extern const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER +
 extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1];
 extern const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1];
 extern const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1];
+extern const struct nla_policy ethnl_rxfh_get_policy[ETHTOOL_A_RXFH_RSS_CONTEXT + 1];
 
 int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
 int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/ethtool/rxfh.c b/net/ethtool/rxfh.c
new file mode 100644
index 000000000000..136e3e8ad7d4
--- /dev/null
+++ b/net/ethtool/rxfh.c
@@ -0,0 +1,158 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "netlink.h"
+#include "common.h"
+
+struct rxfh_req_info {
+	struct ethnl_req_info		base;
+};
+
+struct rxfh_reply_data {
+	struct ethnl_reply_data		base;
+	struct ethtool_rxfh		rxfh;
+	u32				*rss_config;
+};
+
+#define RXFH_REPDATA(__reply_base) \
+	container_of(__reply_base, struct rxfh_reply_data, base)
+
+const struct nla_policy ethnl_rxfh_get_policy[] = {
+	[ETHTOOL_A_RXFH_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+	[ETHTOOL_A_RXFH_RSS_CONTEXT] = { .type = NLA_U32 },
+};
+
+static int rxfh_prepare_data(const struct ethnl_req_info *req_base,
+			     struct ethnl_reply_data *reply_base,
+			     struct genl_info *info)
+{
+	struct rxfh_reply_data *data = RXFH_REPDATA(reply_base);
+	struct net_device *dev = reply_base->dev;
+	struct ethtool_rxfh *rxfh = &data->rxfh;
+	struct ethnl_req_info req_info = {};
+	struct nlattr **tb = info->attrs;
+	u32 indir_size = 0, hkey_size = 0;
+	const struct ethtool_ops *ops;
+	u32 total_size, indir_bytes;
+	bool mod = false;
+	u8 dev_hfunc = 0;
+	u8 *hkey = NULL;
+	u8 *rss_config;
+	int ret;
+
+	ops = dev->ethtool_ops;
+	if (!ops->get_rxfh)
+		return -EOPNOTSUPP;
+
+	ret = ethnl_parse_header_dev_get(&req_info,
+					 tb[ETHTOOL_A_RXFH_HEADER],
+					 genl_info_net(info), info->extack,
+					 true);
+	if (ret < 0)
+		return ret;
+
+	ethnl_update_u32(&rxfh->rss_context, tb[ETHTOOL_A_RXFH_RSS_CONTEXT],
+			 &mod);
+
+	/* Some drivers don't handle rss_context */
+	if (rxfh->rss_context && !ops->get_rxfh_context) {
+		ret = -EOPNOTSUPP;
+		goto out_dev;
+	}
+
+	ret = ethnl_ops_begin(dev);
+	if (ret < 0)
+		goto out_dev;
+
+	if (ops->get_rxfh_indir_size)
+		indir_size = ops->get_rxfh_indir_size(dev);
+	if (ops->get_rxfh_key_size)
+		hkey_size = ops->get_rxfh_key_size(dev);
+
+	indir_bytes = indir_size * sizeof(rxfh->rss_config[0]);
+	total_size = indir_bytes + hkey_size;
+	rss_config = kzalloc(total_size, GFP_USER);
+	if (!rss_config) {
+		ret = -ENOMEM;
+		goto out_ops;
+	}
+
+	if (indir_size) {
+		data->rss_config = (u32 *)rss_config;
+		rxfh->indir_size = indir_size;
+	}
+
+	if (hkey_size) {
+		hkey = rss_config + indir_bytes;
+		rxfh->key_size = hkey_size;
+	}
+
+	if (rxfh->rss_context)
+		ret = ops->get_rxfh_context(dev, data->rss_config, hkey,
+					    &dev_hfunc, rxfh->rss_context);
+	else
+		ret = ops->get_rxfh(dev, data->rss_config, hkey, &dev_hfunc);
+
+	rxfh->hfunc = dev_hfunc;
+
+out_ops:
+	ethnl_ops_complete(dev);
+out_dev:
+	ethnl_parse_header_dev_put(&req_info);
+	return ret;
+}
+
+static int rxfh_reply_size(const struct ethnl_req_info *req_base,
+			   const struct ethnl_reply_data *reply_base)
+{
+	const struct rxfh_reply_data *data = RXFH_REPDATA(reply_base);
+	const struct ethtool_rxfh *rxfh = &data->rxfh;
+	int len;
+
+	len =  nla_total_size(sizeof(u32)) +	/* _RSS_CONTEXT */
+	       nla_total_size(sizeof(u32)) +	/* _RXFH_INDIR_SIZE */
+	       nla_total_size(sizeof(u32)) +	/* _RXFH_KEY_SIZE */
+	       nla_total_size(sizeof(u8));	/* _RXFH_HFUNC */
+	len += nla_total_size(sizeof(u32)) * rxfh->indir_size +
+	       rxfh->key_size;
+
+	return len;
+}
+
+static int rxfh_fill_reply(struct sk_buff *skb,
+			   const struct ethnl_req_info *req_base,
+			   const struct ethnl_reply_data *reply_base)
+{
+	const struct rxfh_reply_data *data = RXFH_REPDATA(reply_base);
+	const struct ethtool_rxfh *rxfh = &data->rxfh;
+
+	if (nla_put_u32(skb, ETHTOOL_A_RXFH_RSS_CONTEXT, rxfh->rss_context) ||
+	    nla_put_u32(skb, ETHTOOL_A_RXFH_INDIR_SIZE, rxfh->indir_size) ||
+	    nla_put_u32(skb, ETHTOOL_A_RXFH_KEY_SIZE, rxfh->key_size) ||
+	    nla_put_u8(skb, ETHTOOL_A_RXFH_HFUNC, rxfh->hfunc) ||
+	    nla_put(skb, ETHTOOL_A_RXFH_RSS_CONFIG,
+		    sizeof(u32) * rxfh->indir_size + rxfh->key_size,
+		    data->rss_config))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static void rxfh_cleanup_data(struct ethnl_reply_data *reply_base)
+{
+	const struct rxfh_reply_data *data = RXFH_REPDATA(reply_base);
+
+	kfree(data->rss_config);
+}
+
+const struct ethnl_request_ops ethnl_rxfh_request_ops = {
+	.request_cmd		= ETHTOOL_MSG_RXFH_GET,
+	.reply_cmd		= ETHTOOL_MSG_RXFH_GET_REPLY,
+	.hdr_attr		= ETHTOOL_A_RXFH_HEADER,
+	.req_info_size		= sizeof(struct rxfh_req_info),
+	.reply_data_size	= sizeof(struct rxfh_reply_data),
+
+	.prepare_data		= rxfh_prepare_data,
+	.reply_size		= rxfh_reply_size,
+	.fill_reply		= rxfh_fill_reply,
+	.cleanup_data		= rxfh_cleanup_data,
+};