Message ID | 53077d35a1183d5c1110076a07d73940bb2a55f3.1724944117.git.pabeni@redhat.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: introduce TX H/W shaping API | expand |
On Thu, 29 Aug 2024 17:16:55 +0200 Paolo Abeni wrote: > + xa_for_each_range(&data->shapers, index, shaper, ctx->start_index, > + U32_MAX) { > + net_shaper_index_to_handle(index, &handle); > + ret = net_shaper_fill_one(skb, binding, &handle, shaper, info); > + if (ret) > + return ret; > + > + ctx->start_index = index; 75% sure this should say index + 1, or the xa_for_each.. should use start_index + 1. start position is inclusive. XA iterators are hard to get right in netlink, I'd suggest using the same form of iteration as for_each_netdev_dump()..
On Thu, 29 Aug 2024 17:16:55 +0200 Paolo Abeni wrote: > +static int net_shaper_fill_handle(struct sk_buff *msg, > + const struct net_shaper_handle *handle, > + u32 type) > +{ > + struct nlattr *handle_attr; > + > + if (handle->scope == NET_SHAPER_SCOPE_UNSPEC) > + return 0; > + > + handle_attr = nla_nest_start_noflag(msg, type); _noflag() is deprecated > + if (!handle_attr) > + return -EMSGSIZE; > + > + if (nla_put_u32(msg, NET_SHAPER_A_HANDLE_SCOPE, handle->scope) || > + (handle->scope >= NET_SHAPER_SCOPE_QUEUE && > + nla_put_u32(msg, NET_SHAPER_A_HANDLE_ID, handle->id))) > + goto handle_nest_cancel; > + > + nla_nest_end(msg, handle_attr); > + return 0; > + > +handle_nest_cancel: > + nla_nest_cancel(msg, handle_attr); > + return -EMSGSIZE; > +} > +/* Initialize the context fetching the relevant device and > + * acquiring a reference to it. > + */ > +static int net_shaper_ctx_init(const struct genl_info *info, int type, > + struct net_shaper_nl_ctx *ctx) > +{ > + struct net *ns = genl_info_net(info); > + struct net_device *dev; > + int ifindex; > + > + memset(ctx, 0, sizeof(*ctx)); > + if (GENL_REQ_ATTR_CHECK(info, type)) > + return -EINVAL; > + > + ifindex = nla_get_u32(info->attrs[type]); Let's limit the 'binding' thing to just driver call sites, we can redo the rest easily later. This line and next pretends to take "arbitrary" type but clearly wants a ifindex/netdev, right? > + dev = netdev_get_by_index(ns, ifindex, &ctx->dev_tracker, GFP_KERNEL); > + if (!dev) { > + NL_SET_BAD_ATTR(info->extack, info->attrs[type]); > + return -ENOENT; > + } > +static int net_shaper_parse_handle(const struct nlattr *attr, > + const struct genl_info *info, > + struct net_shaper_handle *handle) > +{ > + struct nlattr *tb[NET_SHAPER_A_HANDLE_MAX + 1]; > + struct nlattr *scope_attr, *id_attr; > + u32 id = 0; > + int ret; > + > + ret = nla_parse_nested(tb, NET_SHAPER_A_HANDLE_MAX, attr, > + net_shaper_handle_nl_policy, info->extack); > + if (ret < 0) > + return ret; > + > + scope_attr = tb[NET_SHAPER_A_HANDLE_SCOPE]; > + if (!scope_attr) { NL_REQ_ATTR_CHECK() > + NL_SET_BAD_ATTR(info->extack, > + tb[NET_SHAPER_A_HANDLE_SCOPE]); > + return -EINVAL; > + } > + > + handle->scope = nla_get_u32(scope_attr); > + > + /* The default id for NODE scope shapers is an invalid one > + * to help the 'group' operation discriminate between new > + * NODE shaper creation (ID_UNSPEC) and reuse of existing > + * shaper (any other value). > + */ > + id_attr = tb[NET_SHAPER_A_HANDLE_ID]; > + if (id_attr) > + id = nla_get_u32(id_attr); > + else if (handle->scope == NET_SHAPER_SCOPE_NODE) > + id = NET_SHAPER_ID_UNSPEC; > + > + handle->id = id; > + return 0; > +} > + > +static int net_shaper_generic_pre(struct genl_info *info, int type) > +{ > + struct net_shaper_nl_ctx *ctx; > + int ret; > + > + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); Maybe send a patch like this, to avoid having to allocate this space, and special casing dump vs doit: diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 9ab49bfeae78..7658f0885178 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -124,7 +124,8 @@ struct genl_family { * @genlhdr: generic netlink message header * @attrs: netlink attributes * @_net: network namespace - * @user_ptr: user pointers + * @ctx: storage space for the use by the family + * @user_ptr: user pointers (deprecated, use ctx instead) * @extack: extended ACK report struct */ struct genl_info { @@ -135,7 +136,10 @@ struct genl_info { struct genlmsghdr * genlhdr; struct nlattr ** attrs; possible_net_t _net; - void * user_ptr[2]; + union { + u8 ctx[48]; + void * user_ptr[2]; + }; struct netlink_ext_ack *extack; }; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index feb54c63a116..29387b605f3e 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -997,7 +997,7 @@ static int genl_start(struct netlink_callback *cb) info->info.attrs = attrs; genl_info_net_set(&info->info, sock_net(cb->skb->sk)); info->info.extack = cb->extack; - memset(&info->info.user_ptr, 0, sizeof(info->info.user_ptr)); + memset(&info->info.ctx, 0, sizeof(info->info.ctx)); cb->data = info; if (ops->start) { @@ -1104,7 +1104,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family, info.attrs = attrbuf; info.extack = extack; genl_info_net_set(&info, net); - memset(&info.user_ptr, 0, sizeof(info.user_ptr)); + memset(&info.ctx, 0, sizeof(info.ctx)); if (ops->pre_doit) { err = ops->pre_doit(ops, skb, &info); > + if (!ctx) > + return -ENOMEM; > + > + ret = net_shaper_ctx_init(info, type, ctx); > + if (ret) { > + kfree(ctx); > + return ret; > + } > + > + info->user_ptr[0] = ctx; > + return 0; > +} > + > int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) > { > - return -EOPNOTSUPP; > + struct net_shaper_binding *binding; > + struct net_shaper_handle handle; > + struct net_shaper_info *shaper; > + struct sk_buff *msg; > + int ret; > + > + if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_HANDLE)) > + return -EINVAL; > + > + binding = net_shaper_binding_from_ctx(info->user_ptr[0]); This 'binding' has the same meaning as 'binding' in TCP ZC? :( > + shaper = net_shaper_cache_lookup(binding, &handle); Why call the stored info "cache"? It's the authoritative version of user configuration, isn't it? > + if (!shaper) { > + NL_SET_BAD_ATTR(info->extack, > + info->attrs[NET_SHAPER_A_HANDLE]); > + return -ENOENT; > + } > + > + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); > + if (!msg) > + return -ENOMEM; > + > + ret = net_shaper_fill_one(msg, binding, &handle, shaper, info); > + if (ret) > + goto free_msg; > + > + ret = genlmsg_reply(msg, info); double space
On 8/30/24 03:20, Jakub Kicinski wrote:>> +/* Initialize the context fetching the relevant device and >> + * acquiring a reference to it. >> + */ >> +static int net_shaper_ctx_init(const struct genl_info *info, int type, >> + struct net_shaper_nl_ctx *ctx) >> +{ >> + struct net *ns = genl_info_net(info); >> + struct net_device *dev; >> + int ifindex; >> + >> + memset(ctx, 0, sizeof(*ctx)); >> + if (GENL_REQ_ATTR_CHECK(info, type)) >> + return -EINVAL; >> + >> + ifindex = nla_get_u32(info->attrs[type]); > > Let's limit the 'binding' thing to just driver call sites, we can > redo the rest easily later. This line and next pretends to take > "arbitrary" type but clearly wants a ifindex/netdev, right? There is a misunderstanding. This helper will be used in a following patch (7/12) with a different 'type' argument: NET_SHAPER_A_BINDING_IFINDEX. I've put a note in the commit message, but was unintentionally dropped in one of the recent refactors. I'll add that note back. I hope you are ok with the struct net_shaper_binding * argument to most helpers? does not add complexity, will help to support devlink objects and swapping back and forth from/to struct net_device* can't be automated. [...] > Maybe send a patch like this, to avoid having to allocate this space, > and special casing dump vs doit: > > diff --git a/include/net/genetlink.h b/include/net/genetlink.h > index 9ab49bfeae78..7658f0885178 100644 > --- a/include/net/genetlink.h > +++ b/include/net/genetlink.h > @@ -124,7 +124,8 @@ struct genl_family { > * @genlhdr: generic netlink message header > * @attrs: netlink attributes > * @_net: network namespace > - * @user_ptr: user pointers > + * @ctx: storage space for the use by the family > + * @user_ptr: user pointers (deprecated, use ctx instead) > * @extack: extended ACK report struct > */ > struct genl_info { > @@ -135,7 +136,10 @@ struct genl_info { > struct genlmsghdr * genlhdr; > struct nlattr ** attrs; > possible_net_t _net; > - void * user_ptr[2]; > + union { > + u8 ctx[48]; > + void * user_ptr[2]; > + }; > struct netlink_ext_ack *extack; > }; Makes sense. Plus likely: #define NETLINK_CTX_SIZE 48 and use such define above and in linux/netlink.h Thanks, Paolo
Hi, Please allow me to put a few high level questions together, to both underline them as most critical, and keep the thread focused. On 8/30/24 03:20, Jakub Kicinski wrote: > This 'binding' has the same meaning as 'binding' in TCP ZC? :( I hope we can agree that good naming is difficult. I thought we agreed on such naming in the past week’s discussion. The term 'binding' is already used in the networking stack in many places to identify different things (i.e. device tree, socket, netfilter.. ). The name prefix avoids any ambiguity and I think this a good name, but if you have any better suggestions, this change should be trivial. [about per device shaper lock] > I've been wondering if we shouldn't move this lock > directly into net_device and combine it with the RSS lock. > Create a "per-netdev" lock, instead of having multiple disparate > mutexes which are hard to allocate? The above looks like a quite unrelated refactor and one I think it will not be worthy. The complexity of locking code in this series is very limited, and self-encapsulated. Different locks for different things increases scalability. Possibly we will not see much contention on the same device, but some years ago we did not think there would be much contention on RTNL... Additionally, if we use a per _network device_ lock, future expansion of the core to support devlink objects will be more difficult. [about separate handle from shaper_info arguments] > Wouldn't it be convenient to store the handle in the "info" > object? AFAIU the handle is forever for an info, so no risk of it > being out of sync… Was that way a couple of iterations ago. Jiri explicitly asked for the separation, I asked for confirmation and nobody objected. Which if the 2 options is acceptable from both of you? [about queue limit and channel reconf] > we probably want to trim the queue shapers on channel reconfig, > then, too? :( what about exposing to the drivers an helper alike: net_shaper_notify_delete(binding, handle); that tells the core the shaper at the given handle just went away in the H/W? The driver will call it in the queue deletion helper, and such helper could be later on used more generically, i.e. for vf/devlink port deletion. [about capabilities support] > It's not just for introspection, it's also for the core to do > error checking. Actually, in the previous discussions it was never mentioned to use capabilities to fully centralize the error checking. This really looks like another feature, and can easily be added in a second time (say, a follow-up series), with no functionality loss. I (or anybody else) can’t keep adding new features at every iteration. At some point we need to draw a line, and we should agree that the scope of this activity has already expanded a lot in the past year. I would like to draw such a line here. Thanks, Paolo
On Fri, 30 Aug 2024 12:55:05 +0200 Paolo Abeni wrote: > On 8/30/24 03:20, Jakub Kicinski wrote:>> +/* Initialize the context > fetching the relevant device and > >> + * acquiring a reference to it. > >> + */ > >> +static int net_shaper_ctx_init(const struct genl_info *info, int type, > >> + struct net_shaper_nl_ctx *ctx) > >> +{ > >> + struct net *ns = genl_info_net(info); > >> + struct net_device *dev; > >> + int ifindex; > >> + > >> + memset(ctx, 0, sizeof(*ctx)); > >> + if (GENL_REQ_ATTR_CHECK(info, type)) > >> + return -EINVAL; > >> + > >> + ifindex = nla_get_u32(info->attrs[type]); > > > > Let's limit the 'binding' thing to just driver call sites, we can > > redo the rest easily later. This line and next pretends to take > > "arbitrary" type but clearly wants a ifindex/netdev, right? > > There is a misunderstanding. This helper will be used in a following > patch (7/12) with a different 'type' argument: > NET_SHAPER_A_BINDING_IFINDEX. I've put a note in the commit message, but > was unintentionally dropped in one of the recent refactors. I'll add > that note back. What I'm saying is that if you want to prep the ground for more "binding" types you should also add: if (type != ...IFINDEX) { /* other binding types are TBD */ return -EINVAL; } > I hope you are ok with the struct net_shaper_binding * argument to most > helpers? does not add complexity, will help to support devlink objects > and swapping back and forth from/to struct net_device* can't be automated. I am "okay" in the American sense of the word which AFAIU is "unhappy but won't complain unless asked". > > Maybe send a patch like this, to avoid having to allocate this space, > > and special casing dump vs doit: > > > > diff --git a/include/net/genetlink.h b/include/net/genetlink.h > > index 9ab49bfeae78..7658f0885178 100644 > > --- a/include/net/genetlink.h > > +++ b/include/net/genetlink.h > > @@ -124,7 +124,8 @@ struct genl_family { > > * @genlhdr: generic netlink message header > > * @attrs: netlink attributes > > * @_net: network namespace > > - * @user_ptr: user pointers > > + * @ctx: storage space for the use by the family > > + * @user_ptr: user pointers (deprecated, use ctx instead) > > * @extack: extended ACK report struct > > */ > > struct genl_info { > > @@ -135,7 +136,10 @@ struct genl_info { > > struct genlmsghdr * genlhdr; > > struct nlattr ** attrs; > > possible_net_t _net; > > - void * user_ptr[2]; > > + union { > > + u8 ctx[48]; > > + void * user_ptr[2]; > > + }; > > struct netlink_ext_ack *extack; > > }; > > Makes sense. Plus likely: > > #define NETLINK_CTX_SIZE 48 > > and use such define above and in linux/netlink.h Aha, would be good to also have a checking macro. Maybe rename NL_ASSERT_DUMP_CTX_FITS() to apply more broadly? or add a new one? Weak preference for former.
On Fri, 30 Aug 2024 17:43:08 +0200 Paolo Abeni wrote: > Please allow me to put a few high level questions together, to both > underline them as most critical, and keep the thread focused. > > On 8/30/24 03:20, Jakub Kicinski wrote: > > This 'binding' has the same meaning as 'binding' in TCP ZC? :( > > I hope we can agree that good naming is difficult. I thought we agreed > on such naming in the past week’s discussion. The term 'binding' is > already used in the networking stack in many places to identify > different things (i.e. device tree, socket, netfilter.. ). The name > prefix avoids any ambiguity and I think this a good name, but if you > have any better suggestions, this change should be trivial. Ack. Maybe we can cut down the number of ambiguous nouns elsewhere: maybe call net_shaper_info -> net_shaper ? maybe net_shaper_data -> net_shaper_hierarchy ? > > I've been wondering if we shouldn't move this lock > > directly into net_device and combine it with the RSS lock. > > Create a "per-netdev" lock, instead of having multiple disparate > > mutexes which are hard to allocate? > > The above looks like a quite unrelated refactor and one I think it will > not be worthy. The complexity of locking code in this series is very > limited, and self-encapsulated. Different locks for different things > increases scalability. Possibly we will not see much contention on the > same device, but some years ago we did not think there would be much > contention on RTNL... We need to do this, anyway. Let me do it myself, then. > Additionally, if we use a per _network device_ lock, future expansion of > the core to support devlink objects will be more difficult. You parse out the binding you can store a pointer to the right mutex. > [about separate handle from shaper_info arguments] > > Wouldn't it be convenient to store the handle in the "info" > > object? AFAIU the handle is forever for an info, so no risk of it > > being out of sync… > > Was that way a couple of iterations ago. Jiri explicitly asked for the > separation, I asked for confirmation and nobody objected. Could you link to that? I must have not read it. You can keep it wrapped in a struct *_handle, that's fine. But it can live inside the shaper object. > Which if the 2 options is acceptable from both of you? > > [about queue limit and channel reconf] > > we probably want to trim the queue shapers on channel reconfig, > > then, too? :( > > what about exposing to the drivers an helper alike: > > net_shaper_notify_delete(binding, handle); > > that tells the core the shaper at the given handle just went away in the > H/W? The driver will call it in the queue deletion helper, and such > helper could be later on used more generically, i.e. for vf/devlink port > deletion. We can either prevent disabling queues which have shapers attached, or auto-removing the shapers. No preference on that. But put the callback in the core, please, netif_set_real_num_rx_queues() ? Why not? > > It's not just for introspection, it's also for the core to do > > error checking. > > Actually, in the previous discussions it was never mentioned to use > capabilities to fully centralize the error checking. > > This really looks like another feature, and can easily be added in a > second time (say, a follow-up series), with no functionality loss. > > I (or anybody else) can’t keep adding new features at every iteration. > At some point we need to draw a line, and we should agree that the scope > of this activity has already expanded a lot in the past year. I would > like to draw such a line here. I can help you. Just tell me which parts you want me to take care of.
On Fri, 30 Aug 2024 11:39:00 -0700 Jakub Kicinski wrote: > > There is a misunderstanding. This helper will be used in a following > > patch (7/12) with a different 'type' argument: > > NET_SHAPER_A_BINDING_IFINDEX. I've put a note in the commit message, but > > was unintentionally dropped in one of the recent refactors. I'll add > > that note back. > > What I'm saying is that if you want to prep the ground for more > "binding" types you should also add: > > if (type != ...IFINDEX) { > /* other binding types are TBD */ > return -EINVAL; > } Ah, the part I missed is that there are two different types for ifindex: NET_SHAPER_A_IFINDEX NET_SHAPER_A_CAPABILITIES_IFINDEX Got it now.
On 8/30/24 21:14, Jakub Kicinski wrote: > On Fri, 30 Aug 2024 17:43:08 +0200 Paolo Abeni wrote: >> Please allow me to put a few high level questions together, to both >> underline them as most critical, and keep the thread focused. >> >> On 8/30/24 03:20, Jakub Kicinski wrote: >> > This 'binding' has the same meaning as 'binding' in TCP ZC? :( >> >> I hope we can agree that good naming is difficult. I thought we agreed >> on such naming in the past week’s discussion. The term 'binding' is >> already used in the networking stack in many places to identify >> different things (i.e. device tree, socket, netfilter.. ). The name >> prefix avoids any ambiguity and I think this a good name, but if you >> have any better suggestions, this change should be trivial. > > Ack. Maybe we can cut down the number of ambiguous nouns elsewhere: > > maybe call net_shaper_info -> net_shaper ? > > maybe net_shaper_data -> net_shaper_hierarchy ? Is everybody fine with the above? >> [about separate handle from shaper_info arguments] >> > Wouldn't it be convenient to store the handle in the "info" >> > object? AFAIU the handle is forever for an info, so no risk of it >> > being out of sync… >> >> Was that way a couple of iterations ago. Jiri explicitly asked for the >> separation, I asked for confirmation and nobody objected. > > Could you link to that? I must have not read it. https://lore.kernel.org/netdev/ZqzIoZaGVb3jIW43@nanopsycho.orion/ search for "I wonder if the handle should be part of this structure" I must admit by wannabe reply on such point never left my outbox. > You can keep it wrapped in a struct *_handle, that's fine. > But it can live inside the shaper object. That is basically the opposite of what Jiri asked. @Jiri would you be ok reverting to such layout? >> Which if the 2 options is acceptable from both of you? >> >> [about queue limit and channel reconf] >> > we probably want to trim the queue shapers on channel reconfig, >> > then, too? :( >> >> what about exposing to the drivers an helper alike: >> >> net_shaper_notify_delete(binding, handle); >> >> that tells the core the shaper at the given handle just went away in the >> H/W? The driver will call it in the queue deletion helper, and such >> helper could be later on used more generically, i.e. for vf/devlink port >> deletion. > > We can either prevent disabling queues which have shapers attached, > or auto-removing the shapers. I think/fear that prevent disabling queues would lead to weird/unexpected results and more difficult administration, I prefer the callback option. > No preference on that. But put the > callback in the core, please, netif_set_real_num_rx_queues() ? > Why not? It makes sense. I'll add a net_shaper_set_real_num_rx_queues() callback there. /P
On 8/30/24 20:39, Jakub Kicinski wrote: > On Fri, 30 Aug 2024 12:55:05 +0200 Paolo Abeni wrote: >> #define NETLINK_CTX_SIZE 48 >> >> and use such define above and in linux/netlink.h > > Aha, would be good to also have a checking macro. Maybe rename > > NL_ASSERT_DUMP_CTX_FITS() > > to apply more broadly? or add a new one? Weak preference for former. I will rename it to NL_ASSERT_CTX_FITS(), in the same patch. /P
On Mon, 2 Sep 2024 12:10:50 +0200 Paolo Abeni wrote: > >> Was that way a couple of iterations ago. Jiri explicitly asked for the > >> separation, I asked for confirmation and nobody objected. > > > > Could you link to that? I must have not read it. > > https://lore.kernel.org/netdev/ZqzIoZaGVb3jIW43@nanopsycho.orion/ > > search for "I wonder if the handle should be part of this structure" > > I must admit by wannabe reply on such point never left my outbox. "I wonder if .." does not sound like a strong preference. And the parent ID remained in the struct, so it still partially records its position in the hierarchy. Since there is no "move" op it's really not worth multiplying arguments to most functions 2x.
diff --git a/Documentation/networking/kapi.rst b/Documentation/networking/kapi.rst index ea55f462cefa..98682b9a13ee 100644 --- a/Documentation/networking/kapi.rst +++ b/Documentation/networking/kapi.rst @@ -104,6 +104,9 @@ Driver Support .. kernel-doc:: include/linux/netdevice.h :internal: +.. kernel-doc:: include/net/net_shaper.h + :internal: + PHY Support ----------- diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fce70990b209..71bd011fde7b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1603,6 +1603,14 @@ struct net_device_ops { int (*ndo_hwtstamp_set)(struct net_device *dev, struct kernel_hwtstamp_config *kernel_config, struct netlink_ext_ack *extack); + +#if IS_ENABLED(CONFIG_NET_SHAPER) + /** + * @net_shaper_ops: Device shaping offload operations + * see include/net/net_shapers.h + */ + const struct net_shaper_ops *net_shaper_ops; +#endif }; /** @@ -2383,6 +2391,13 @@ struct net_device { /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */ struct dim_irq_moder *irq_moder; +#if IS_ENABLED(CONFIG_NET_SHAPER) + /** + * @net_shaper_data: data tracking the current shaper status + * see include/net/net_shapers.h + */ + struct net_shaper_data *net_shaper_data; +#endif u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; diff --git a/include/net/net_shaper.h b/include/net/net_shaper.h new file mode 100644 index 000000000000..52bad5a2f63b --- /dev/null +++ b/include/net/net_shaper.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _NET_SHAPER_H_ +#define _NET_SHAPER_H_ + +#include <linux/types.h> + +#include <uapi/linux/net_shaper.h> + +struct net_device; +struct devlink; +struct netlink_ext_ack; + +enum net_shaper_binding_type { + NET_SHAPER_BINDING_TYPE_NETDEV, + NET_SHAPER_BINDING_TYPE_DEVLINK_PORT, +}; + +struct net_shaper_binding { + enum net_shaper_binding_type type; + union { + struct net_device *netdev; + struct devlink *devlink; + }; +}; + +struct net_shaper_handle { + enum net_shaper_scope scope; + int id; +}; + +/** + * struct net_shaper_info - represents a shaping node on the NIC H/W + * zeroed field are considered not set. + * @parent: Unique identifier for the shaper parent, usually implied + * @metric: Specify if the rate limits refers to PPS or BPS + * @bw_min: Minimum guaranteed rate for this shaper + * @bw_max: Maximum peak rate allowed for this shaper + * @burst: Maximum burst for the peek rate of this shaper + * @priority: Scheduling priority for this shaper + * @weight: Scheduling weight for this shaper + */ +struct net_shaper_info { + struct net_shaper_handle parent; + enum net_shaper_metric metric; + u64 bw_min; + u64 bw_max; + u64 burst; + u32 priority; + u32 weight; + + /* private: */ + u32 leaves; /* accounted only for NODE scope */ +}; + +/** + * struct net_shaper_ops - Operations on device H/W shapers + * + * The operations applies to either net_device and devlink objects. + * The initial shaping configuration at device initialization is empty: + * does not constraint the rate in any way. + * The network core keeps track of the applied user-configuration in + * the net_device or devlink structure. + * The operations are serialized via a per device lock. + * + * Each shaper is uniquely identified within the device with a 'handle' + * comprising the shaper scope and a scope-specific id. + */ +struct net_shaper_ops { + /** + * @group: create the specified shapers scheduling group + * + * Nest the @leaves shapers identified by @leaves_handles under the + * @root shaper identified by @root_handle. All the shapers belong + * to the network device @dev. The @leaves and @leaves_handles shaper + * arrays size is specified by @leaves_count. + * Create either the @leaves and the @root shaper; or if they already + * exists, links them together in the desired way. + * @leaves scope must be NET_SHAPER_SCOPE_QUEUE. + */ + int (*group)(struct net_shaper_binding *binding, int leaves_count, + const struct net_shaper_handle *leaves_handles, + const struct net_shaper_info *leaves, + const struct net_shaper_handle *root_handle, + const struct net_shaper_info *root, + struct netlink_ext_ack *extack); + + /** + * @set: Updates the specified shaper + * + * Updates or creates the @shaper identified by the provided @handle + * on the given device @dev. + */ + int (*set)(struct net_shaper_binding *binding, + const struct net_shaper_handle *handle, + const struct net_shaper_info *shaper, + struct netlink_ext_ack *extack); + + /** + * @delete: Removes the specified shaper + * + * Removes the shaper configuration as identified by the given @handle + * on the specified device @dev, restoring the default behavior. + */ + int (*delete)(struct net_shaper_binding *binding, + const struct net_shaper_handle *handle, + struct netlink_ext_ack *extack); + + /** + * @capabilities: get the shaper features supported by the device + * + * Fills the bitmask @cap with the supported capabilities for the + * specified @scope and device @dev. + * + * Returns 0 on success or a negative error value otherwise. + */ + int (*capabilities)(struct net_shaper_binding *binding, + enum net_shaper_scope scope, unsigned long *cap); +}; + +#endif diff --git a/net/core/dev.c b/net/core/dev.c index 63987b8b7c85..23629abd3ef7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11201,6 +11201,8 @@ void free_netdev(struct net_device *dev) /* Flush device addresses */ dev_addr_flush(dev); + net_shaper_flush_netdev(dev); + list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p); diff --git a/net/core/dev.h b/net/core/dev.h index 5654325c5b71..13c558874af3 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -35,6 +35,12 @@ void dev_addr_flush(struct net_device *dev); int dev_addr_init(struct net_device *dev); void dev_addr_check(struct net_device *dev); +#if IS_ENABLED(CONFIG_NET_SHAPER) +void net_shaper_flush_netdev(struct net_device *dev); +#else +static inline void net_shaper_flush_netdev(struct net_device *dev) {} +#endif + /* sysctls not referred to from outside net/core/ */ extern int netdev_unregister_timeout_secs; extern int weight_p; diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index a1b20888f502..2ed80df25765 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -1,30 +1,361 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/bits.h> +#include <linux/bitfield.h> +#include <linux/idr.h> #include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/netlink.h> #include <linux/skbuff.h> +#include <linux/xarray.h> +#include <net/devlink.h> +#include <net/net_shaper.h> #include "shaper_nl_gen.h" +#include "../core/dev.h" + +#define NET_SHAPER_SCOPE_SHIFT 26 +#define NET_SHAPER_ID_MASK GENMASK(NET_SHAPER_SCOPE_SHIFT - 1, 0) +#define NET_SHAPER_SCOPE_MASK GENMASK(31, NET_SHAPER_SCOPE_SHIFT) + +#define NET_SHAPER_ID_UNSPEC NET_SHAPER_ID_MASK + +struct net_shaper_data { + struct xarray shapers; +}; + +struct net_shaper_nl_ctx { + struct net_shaper_binding binding; + netdevice_tracker dev_tracker; + u32 start_index; +}; + +static struct net_shaper_binding *net_shaper_binding_from_ctx(void *ctx) +{ + return &((struct net_shaper_nl_ctx *)ctx)->binding; +} + +static struct net_shaper_data * +net_shaper_binding_data(struct net_shaper_binding *binding) +{ + /* The barrier pairs with cmpxchg on init. */ + if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV) + return READ_ONCE(binding->netdev->net_shaper_data); + + /* No other type supported yet.*/ + return NULL; +} + +static int net_shaper_fill_binding(struct sk_buff *msg, + const struct net_shaper_binding *binding, + u32 type) +{ + /* Should never happen, as currently only NETDEV is supported */ + if (WARN_ON_ONCE(binding->type != NET_SHAPER_BINDING_TYPE_NETDEV)) + return -EINVAL; + + if (nla_put_u32(msg, type, binding->netdev->ifindex)) + return -EMSGSIZE; + + return 0; +} + +static int net_shaper_fill_handle(struct sk_buff *msg, + const struct net_shaper_handle *handle, + u32 type) +{ + struct nlattr *handle_attr; + + if (handle->scope == NET_SHAPER_SCOPE_UNSPEC) + return 0; + + handle_attr = nla_nest_start_noflag(msg, type); + if (!handle_attr) + return -EMSGSIZE; + + if (nla_put_u32(msg, NET_SHAPER_A_HANDLE_SCOPE, handle->scope) || + (handle->scope >= NET_SHAPER_SCOPE_QUEUE && + nla_put_u32(msg, NET_SHAPER_A_HANDLE_ID, handle->id))) + goto handle_nest_cancel; + + nla_nest_end(msg, handle_attr); + return 0; + +handle_nest_cancel: + nla_nest_cancel(msg, handle_attr); + return -EMSGSIZE; +} + +static int +net_shaper_fill_one(struct sk_buff *msg, + const struct net_shaper_binding *binding, + const struct net_shaper_handle *handle, + const struct net_shaper_info *shaper, + const struct genl_info *info) +{ + void *hdr; + + hdr = genlmsg_iput(msg, info); + if (!hdr) + return -EMSGSIZE; + + if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_IFINDEX) || + net_shaper_fill_handle(msg, &shaper->parent, + NET_SHAPER_A_PARENT) || + net_shaper_fill_handle(msg, handle, NET_SHAPER_A_HANDLE) || + ((shaper->bw_min || shaper->bw_max || shaper->burst) && + nla_put_u32(msg, NET_SHAPER_A_METRIC, shaper->metric)) || + (shaper->bw_min && + nla_put_uint(msg, NET_SHAPER_A_BW_MIN, shaper->bw_min)) || + (shaper->bw_max && + nla_put_uint(msg, NET_SHAPER_A_BW_MAX, shaper->bw_max)) || + (shaper->burst && + nla_put_uint(msg, NET_SHAPER_A_BURST, shaper->burst)) || + (shaper->priority && + nla_put_u32(msg, NET_SHAPER_A_PRIORITY, shaper->priority)) || + (shaper->weight && + nla_put_u32(msg, NET_SHAPER_A_WEIGHT, shaper->weight))) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/* Initialize the context fetching the relevant device and + * acquiring a reference to it. + */ +static int net_shaper_ctx_init(const struct genl_info *info, int type, + struct net_shaper_nl_ctx *ctx) +{ + struct net *ns = genl_info_net(info); + struct net_device *dev; + int ifindex; + + memset(ctx, 0, sizeof(*ctx)); + if (GENL_REQ_ATTR_CHECK(info, type)) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[type]); + dev = netdev_get_by_index(ns, ifindex, &ctx->dev_tracker, GFP_KERNEL); + if (!dev) { + NL_SET_BAD_ATTR(info->extack, info->attrs[type]); + return -ENOENT; + } + + if (!dev->netdev_ops->net_shaper_ops) { + NL_SET_BAD_ATTR(info->extack, info->attrs[type]); + netdev_put(dev, &ctx->dev_tracker); + return -EOPNOTSUPP; + } + + ctx->binding.type = NET_SHAPER_BINDING_TYPE_NETDEV; + ctx->binding.netdev = dev; + return 0; +} + +static void net_shaper_ctx_cleanup(struct net_shaper_nl_ctx *ctx) +{ + if (ctx->binding.type == NET_SHAPER_BINDING_TYPE_NETDEV) + netdev_put(ctx->binding.netdev, &ctx->dev_tracker); +} + +static u32 net_shaper_handle_to_index(const struct net_shaper_handle *handle) +{ + return FIELD_PREP(NET_SHAPER_SCOPE_MASK, handle->scope) | + FIELD_PREP(NET_SHAPER_ID_MASK, handle->id); +} + +static void net_shaper_index_to_handle(u32 index, + struct net_shaper_handle *handle) +{ + handle->scope = FIELD_GET(NET_SHAPER_SCOPE_MASK, index); + handle->id = FIELD_GET(NET_SHAPER_ID_MASK, index); +} + +/* Lookup the given shaper inside the cache. */ +static struct net_shaper_info * +net_shaper_cache_lookup(struct net_shaper_binding *binding, + const struct net_shaper_handle *handle) +{ + struct net_shaper_data *data = net_shaper_binding_data(binding); + u32 index = net_shaper_handle_to_index(handle); + + return data ? xa_load(&data->shapers, index) : NULL; +} + +static int net_shaper_parse_handle(const struct nlattr *attr, + const struct genl_info *info, + struct net_shaper_handle *handle) +{ + struct nlattr *tb[NET_SHAPER_A_HANDLE_MAX + 1]; + struct nlattr *scope_attr, *id_attr; + u32 id = 0; + int ret; + + ret = nla_parse_nested(tb, NET_SHAPER_A_HANDLE_MAX, attr, + net_shaper_handle_nl_policy, info->extack); + if (ret < 0) + return ret; + + scope_attr = tb[NET_SHAPER_A_HANDLE_SCOPE]; + if (!scope_attr) { + NL_SET_BAD_ATTR(info->extack, + tb[NET_SHAPER_A_HANDLE_SCOPE]); + return -EINVAL; + } + + handle->scope = nla_get_u32(scope_attr); + + /* The default id for NODE scope shapers is an invalid one + * to help the 'group' operation discriminate between new + * NODE shaper creation (ID_UNSPEC) and reuse of existing + * shaper (any other value). + */ + id_attr = tb[NET_SHAPER_A_HANDLE_ID]; + if (id_attr) + id = nla_get_u32(id_attr); + else if (handle->scope == NET_SHAPER_SCOPE_NODE) + id = NET_SHAPER_ID_UNSPEC; + + handle->id = id; + return 0; +} + +static int net_shaper_generic_pre(struct genl_info *info, int type) +{ + struct net_shaper_nl_ctx *ctx; + int ret; + + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ret = net_shaper_ctx_init(info, type, ctx); + if (ret) { + kfree(ctx); + return ret; + } + + info->user_ptr[0] = ctx; + return 0; +} + int net_shaper_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) { - return -EOPNOTSUPP; + return net_shaper_generic_pre(info, NET_SHAPER_A_IFINDEX); +} + +static void net_shaper_generic_post(struct genl_info *info) +{ + struct net_shaper_nl_ctx *ctx = info->user_ptr[0]; + + net_shaper_ctx_cleanup(ctx); + kfree(ctx); } void net_shaper_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) { + net_shaper_generic_post(info); +} + +int net_shaper_nl_pre_dumpit(struct netlink_callback *cb) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; + const struct genl_info *info = genl_info_dump(cb); + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); + + return net_shaper_ctx_init(info, NET_SHAPER_A_IFINDEX, ctx); +} + +int net_shaper_nl_post_dumpit(struct netlink_callback *cb) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; + + net_shaper_ctx_cleanup(ctx); + return 0; } int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) { - return -EOPNOTSUPP; + struct net_shaper_binding *binding; + struct net_shaper_handle handle; + struct net_shaper_info *shaper; + struct sk_buff *msg; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_HANDLE)) + return -EINVAL; + + binding = net_shaper_binding_from_ctx(info->user_ptr[0]); + ret = net_shaper_parse_handle(info->attrs[NET_SHAPER_A_HANDLE], info, + &handle); + if (ret < 0) + return ret; + + shaper = net_shaper_cache_lookup(binding, &handle); + if (!shaper) { + NL_SET_BAD_ATTR(info->extack, + info->attrs[NET_SHAPER_A_HANDLE]); + return -ENOENT; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = net_shaper_fill_one(msg, binding, &handle, shaper, info); + if (ret) + goto free_msg; + + ret = genlmsg_reply(msg, info); + if (ret) + goto free_msg; + + return 0; + +free_msg: + nlmsg_free(msg); + return ret; } int net_shaper_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { - return -EOPNOTSUPP; + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; + const struct genl_info *info = genl_info_dump(cb); + struct net_shaper_binding *binding; + struct net_shaper_handle handle; + struct net_shaper_info *shaper; + struct net_shaper_data *data; + unsigned long index; + int ret; + + /* Don't error out dumps performed before any set operation. */ + binding = net_shaper_binding_from_ctx(ctx); + data = net_shaper_binding_data(binding); + if (!data) + return 0; + + xa_for_each_range(&data->shapers, index, shaper, ctx->start_index, + U32_MAX) { + net_shaper_index_to_handle(index, &handle); + ret = net_shaper_fill_one(skb, binding, &handle, shaper, info); + if (ret) + return ret; + + ctx->start_index = index; + } + + return 0; } int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info) @@ -37,14 +368,32 @@ int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; } -int net_shaper_nl_pre_dumpit(struct netlink_callback *cb) +static void net_shaper_flush(struct net_shaper_binding *binding) { - return -EOPNOTSUPP; + struct net_shaper_data *data = net_shaper_binding_data(binding); + struct net_shaper_info *cur; + unsigned long index; + + if (!data) + return; + + xa_lock(&data->shapers); + xa_for_each(&data->shapers, index, cur) { + __xa_erase(&data->shapers, index); + kfree(cur); + } + xa_unlock(&data->shapers); + kfree(data); } -int net_shaper_nl_post_dumpit(struct netlink_callback *cb) +void net_shaper_flush_netdev(struct net_device *dev) { - return -EOPNOTSUPP; + struct net_shaper_binding binding = { + .type = NET_SHAPER_BINDING_TYPE_NETDEV, + .netdev = dev, + }; + + net_shaper_flush(&binding); } static int __init shaper_init(void)
Introduce the basic infrastructure to implement the net-shaper core functionality. Each network devices carries a net-shaper cache, the NL get() operation fetches the data from such cache. The cache is initially empty, will be fill by the set()/group() operation implemented later and is destroyed at device cleanup time. The net_shaper_ctx_init() and net_shaper_generic_pre() implementations handle generic index type attributes, despite the current caller always pass a constant value to avoid more noise in later patches using them. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- v4 -> v5: - ops operate on struct binding - replace net_device * with binding* in most helpers - include 'ifindex' in get/dump output - use dev_tracker for real - user pre/post for dump op, too - use NL_SET_BAD_ATTR where applicable - drop redundant/useless kdoc documentation - add type arg to net_shaper_ctx_init() (moved from later patch) - factor out generic pre/post helper for later usage in the series - remove unneeded forward declaration from netdevice.h - dropped 'inline' modifier in .c file - dropped black line at net_shaper.h EoF v3 -> v4: - add scope prefix - use forward declaration in the include - move the handle out of shaper_info RFC v2 -> RFC v3: - dev_put() -> netdev_put() --- Documentation/networking/kapi.rst | 3 + include/linux/netdevice.h | 15 ++ include/net/net_shaper.h | 121 ++++++++++ net/core/dev.c | 2 + net/core/dev.h | 6 + net/shaper/shaper.c | 363 +++++++++++++++++++++++++++++- 6 files changed, 503 insertions(+), 7 deletions(-) create mode 100644 include/net/net_shaper.h