Message ID | 7ed5d9b312ccda58c3400c7ba78bca8e5f8ea853.1722357745.git.pabeni@redhat.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: introduce TX H/W shaping API | expand |
Tue, Jul 30, 2024 at 10:39:46PM CEST, pabeni@redhat.com wrote: >Introduce the basic infrastructure to implement the net-shaper >core functionality. Each network devices carries a net-shaper cache, >the NL get() operation fetches the data from such cache. > >The cache is initially empty, will be fill by the set()/group() >operation implemented later and is destroyed at device cleanup time. > >Signed-off-by: Paolo Abeni <pabeni@redhat.com> >--- >RFC v2 -> RFC v3: > - dev_put() -> netdev_put() >--- > Documentation/networking/kapi.rst | 3 + > include/linux/netdevice.h | 17 ++ > include/net/net_shaper.h | 158 +++++++++++++++++++ > net/core/dev.c | 2 + > net/core/dev.h | 6 + > net/shaper/shaper.c | 251 +++++++++++++++++++++++++++++- > 6 files changed, 435 insertions(+), 2 deletions(-) > create mode 100644 include/net/net_shaper.h > >diff --git a/Documentation/networking/kapi.rst b/Documentation/networking/kapi.rst >index ea55f462cefa..98682b9a13ee 100644 >--- a/Documentation/networking/kapi.rst >+++ b/Documentation/networking/kapi.rst >@@ -104,6 +104,9 @@ Driver Support > .. kernel-doc:: include/linux/netdevice.h > :internal: > >+.. kernel-doc:: include/net/net_shaper.h >+ :internal: >+ > PHY Support > ----------- > >diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h >index 607009150b5f..d3d952be711c 100644 >--- a/include/linux/netdevice.h >+++ b/include/linux/netdevice.h >@@ -81,6 +81,8 @@ struct xdp_frame; > struct xdp_metadata_ops; > struct xdp_md; > struct ethtool_netdev_state; >+struct net_shaper_ops; >+struct net_shaper_data; > > typedef u32 xdp_features_t; > >@@ -1598,6 +1600,14 @@ struct net_device_ops { > int (*ndo_hwtstamp_set)(struct net_device *dev, > struct kernel_hwtstamp_config *kernel_config, > struct netlink_ext_ack *extack); >+ >+#if IS_ENABLED(CONFIG_NET_SHAPER) >+ /** >+ * @net_shaper_ops: Device shaping offload operations >+ * see include/net/net_shapers.h >+ */ >+ const struct net_shaper_ops *net_shaper_ops; >+#endif > }; > > /** >@@ -2408,6 +2418,13 @@ struct net_device { > /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */ > struct dim_irq_moder *irq_moder; > >+#if IS_ENABLED(CONFIG_NET_SHAPER) >+ /** >+ * @net_shaper_data: data tracking the current shaper status >+ * see include/net/net_shapers.h >+ */ >+ struct net_shaper_data *net_shaper_data; >+#endif > u8 priv[] ____cacheline_aligned > __counted_by(priv_len); > } ____cacheline_aligned; >diff --git a/include/net/net_shaper.h b/include/net/net_shaper.h >new file mode 100644 >index 000000000000..8cd65d727e52 >--- /dev/null >+++ b/include/net/net_shaper.h >@@ -0,0 +1,158 @@ >+/* SPDX-License-Identifier: GPL-2.0-or-later */ >+ >+#ifndef _NET_SHAPER_H_ >+#define _NET_SHAPER_H_ >+ >+#include <linux/types.h> >+#include <linux/bits.h> >+#include <linux/bitfield.h> >+#include <linux/netdevice.h> >+#include <linux/netlink.h> >+ >+#include <uapi/linux/net_shaper.h> >+ >+/** >+ * struct net_shaper_info - represents a shaping node on the NIC H/W >+ * zeroed field are considered not set. >+ * @handle: Unique identifier for the shaper, see @net_shaper_make_handle >+ * @parent: Unique identifier for the shaper parent, usually implied. Only >+ * NET_SHAPER_SCOPE_QUEUE, NET_SHAPER_SCOPE_NETDEV and NET_SHAPER_SCOPE_DETACHED >+ * can have the parent handle explicitly set, placing such shaper under >+ * the specified parent. >+ * @metric: Specify if the bw limits refers to PPS or BPS >+ * @bw_min: Minimum guaranteed rate for this shaper >+ * @bw_max: Maximum peak bw allowed for this shaper >+ * @burst: Maximum burst for the peek rate of this shaper >+ * @priority: Scheduling priority for this shaper >+ * @weight: Scheduling weight for this shaper >+ * @children: Number of nested shapers, accounted only for DETACHED scope >+ */ >+struct net_shaper_info { >+ u32 handle; >+ u32 parent; >+ enum net_shaper_metric metric; >+ u64 bw_min; >+ u64 bw_max; >+ u64 burst; >+ u32 priority; >+ u32 weight; >+ u32 children; >+}; >+ >+/** >+ * define NET_SHAPER_SCOPE_VF - Shaper scope >+ * >+ * This shaper scope is not exposed to user-space; the shaper is attached to >+ * the given virtual function. >+ */ >+#define NET_SHAPER_SCOPE_VF __NET_SHAPER_SCOPE_MAX >+ >+/** >+ * struct net_shaper_ops - Operations on device H/W shapers >+ * >+ * The initial shaping configuration ad device initialization is empty/ >+ * a no-op/does not constraint the b/w in any way. >+ * The network core keeps track of the applied user-configuration in >+ * per device storage. >+ * >+ * Each shaper is uniquely identified within the device with an 'handle', >+ * dependent on the shaper scope and other data, see @shaper_make_handle() >+ */ >+struct net_shaper_ops { >+ /** >+ * @group: create the specified shapers group >+ * >+ * Nest the specified @inputs shapers under the given @output shaper >+ * on the network device @dev. The @input shaper array size is specified >+ * by @nr_input. >+ * Create either the @inputs and the @output shaper as needed, >+ * otherwise move them as needed. Can't create @inputs shapers with >+ * NET_SHAPER_SCOPE_DETACHED scope, a separate @group call with such >+ * shaper as @output is needed. >+ * >+ * Returns 0 on group successfully created, otherwise an negative >+ * error value and set @extack to describe the failure's reason. >+ */ >+ int (*group)(struct net_device *dev, int nr_input, >+ const struct net_shaper_info *inputs, >+ const struct net_shaper_info *output, >+ struct netlink_ext_ack *extack); >+ >+ /** >+ * @set: Updates the specified shaper >+ * >+ * Updates or creates the specified @shaper on the given device >+ * @dev. Can't create NET_SHAPER_SCOPE_DETACHED shapers, use @group >+ * instead. >+ * >+ * Returns 0 on group successfully created, otherwise an negative >+ * error value and set @extack to describe the failure's reason. >+ */ >+ int (*set)(struct net_device *dev, >+ const struct net_shaper_info *shaper, >+ struct netlink_ext_ack *extack); >+ >+ /** >+ * @delete: Removes the specified shaper from the NIC >+ * >+ * Removes the shaper configuration as identified by the given @handle >+ * on the specified device @dev, restoring the default behavior. >+ * >+ * Returns 0 on group successfully created, otherwise an negative >+ * error value and set @extack to describe the failure's reason. >+ */ >+ int (*delete)(struct net_device *dev, u32 handle, >+ struct netlink_ext_ack *extack); >+}; >+ >+#define NET_SHAPER_SCOPE_SHIFT 26 >+#define NET_SHAPER_ID_MASK GENMASK(NET_SHAPER_SCOPE_SHIFT - 1, 0) >+#define NET_SHAPER_SCOPE_MASK GENMASK(31, NET_SHAPER_SCOPE_SHIFT) >+ >+#define NET_SHAPER_ID_UNSPEC NET_SHAPER_ID_MASK >+ >+/** >+ * net_shaper_make_handle - creates an unique shaper identifier >+ * @scope: the shaper scope >+ * @id: the shaper id number >+ * >+ * Return: an unique identifier for the shaper >+ * >+ * Combines the specified arguments to create an unique identifier for >+ * the shaper. The @id argument semantic depends on the >+ * specified scope. >+ * For @NET_SHAPER_SCOPE_QUEUE_GROUP, @id is the queue group id >+ * For @NET_SHAPER_SCOPE_QUEUE, @id is the queue number. >+ * For @NET_SHAPER_SCOPE_VF, @id is virtual function number. >+ */ >+static inline u32 net_shaper_make_handle(enum net_shaper_scope scope, >+ int id) >+{ >+ return FIELD_PREP(NET_SHAPER_SCOPE_MASK, scope) | >+ FIELD_PREP(NET_SHAPER_ID_MASK, id); Perhaps some scopes may find only part of u32 as limitting for id in the future? I find it elegant to have it in single u32 though. u64 may be nicer (I know, xarray) :) >+} >+ >+/** >+ * net_shaper_handle_scope - extract the scope from the given handle >+ * @handle: the shaper handle >+ * >+ * Return: the corresponding scope >+ */ >+static inline enum net_shaper_scope net_shaper_handle_scope(u32 handle) >+{ >+ return FIELD_GET(NET_SHAPER_SCOPE_MASK, handle); >+} >+ >+/** >+ * net_shaper_handle_id - extract the id number from the given handle >+ * @handle: the shaper handle >+ * >+ * Return: the corresponding id number >+ */ >+static inline int net_shaper_handle_id(u32 handle) >+{ >+ return FIELD_GET(NET_SHAPER_ID_MASK, handle); >+} >+ >+#endif >+ >diff --git a/net/core/dev.c b/net/core/dev.c >index 6ea1d20676fb..3dc1dd428eda 100644 >--- a/net/core/dev.c >+++ b/net/core/dev.c >@@ -11169,6 +11169,8 @@ void free_netdev(struct net_device *dev) > /* Flush device addresses */ > dev_addr_flush(dev); > >+ dev_shaper_flush(dev); >+ > list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) > netif_napi_del(p); > >diff --git a/net/core/dev.h b/net/core/dev.h >index 5654325c5b71..e376fc1c867b 100644 >--- a/net/core/dev.h >+++ b/net/core/dev.h >@@ -35,6 +35,12 @@ void dev_addr_flush(struct net_device *dev); > int dev_addr_init(struct net_device *dev); > void dev_addr_check(struct net_device *dev); > >+#if IS_ENABLED(CONFIG_NET_SHAPER) >+void dev_shaper_flush(struct net_device *dev); >+#else >+static inline void dev_shaper_flush(struct net_device *dev) {} >+#endif >+ > /* sysctls not referred to from outside net/core/ */ > extern int netdev_unregister_timeout_secs; > extern int weight_p; >diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c >index 49de88c68e2f..5d1d6e600a6a 100644 >--- a/net/shaper/shaper.c >+++ b/net/shaper/shaper.c >@@ -1,19 +1,242 @@ > // SPDX-License-Identifier: GPL-2.0-or-later > > #include <linux/kernel.h> >+#include <linux/idr.h> > #include <linux/skbuff.h> >+#include <linux/xarray.h> >+#include <net/net_shaper.h> > > #include "shaper_nl_gen.h" > >+#include "../core/dev.h" >+ >+struct net_shaper_data { >+ struct xarray shapers; >+ struct idr detached_ids; Hmm, I wonder what this will be used for :) Anyway, could you move to patch which is using it? >+}; >+ >+struct net_shaper_nl_ctx { >+ u32 start_handle; >+}; >+ >+static int fill_handle(struct sk_buff *msg, u32 handle, u32 type, >+ const struct genl_info *info) >+{ >+ struct nlattr *handle_attr; >+ >+ if (!handle) >+ return 0; >+ >+ handle_attr = nla_nest_start_noflag(msg, type); >+ if (!handle_attr) >+ return -EMSGSIZE; >+ >+ if (nla_put_u32(msg, NET_SHAPER_A_SCOPE, >+ net_shaper_handle_scope(handle)) || >+ nla_put_u32(msg, NET_SHAPER_A_ID, >+ net_shaper_handle_id(handle))) >+ goto handle_nest_cancel; >+ >+ nla_nest_end(msg, handle_attr); >+ return 0; >+ >+handle_nest_cancel: >+ nla_nest_cancel(msg, handle_attr); >+ return -EMSGSIZE; >+} >+ >+static int >+net_shaper_fill_one(struct sk_buff *msg, struct net_shaper_info *shaper, >+ const struct genl_info *info) >+{ >+ void *hdr; >+ >+ hdr = genlmsg_iput(msg, info); >+ if (!hdr) >+ return -EMSGSIZE; >+ >+ if (fill_handle(msg, shaper->parent, NET_SHAPER_A_PARENT, info) || >+ fill_handle(msg, shaper->handle, NET_SHAPER_A_HANDLE, info) || >+ nla_put_u32(msg, NET_SHAPER_A_METRIC, shaper->metric) || >+ nla_put_uint(msg, NET_SHAPER_A_BW_MIN, shaper->bw_min) || >+ nla_put_uint(msg, NET_SHAPER_A_BW_MAX, shaper->bw_max) || >+ nla_put_uint(msg, NET_SHAPER_A_BURST, shaper->burst) || >+ nla_put_u32(msg, NET_SHAPER_A_PRIORITY, shaper->priority) || >+ nla_put_u32(msg, NET_SHAPER_A_WEIGHT, shaper->weight)) >+ goto nla_put_failure; >+ >+ genlmsg_end(msg, hdr); >+ >+ return 0; >+ >+nla_put_failure: >+ genlmsg_cancel(msg, hdr); >+ return -EMSGSIZE; >+} >+ >+/* On success sets pdev to the relevant device and acquires a reference >+ * to it >+ */ >+static int fetch_dev(const struct genl_info *info, struct net_device **pdev) >+{ >+ struct net *ns = genl_info_net(info); >+ struct net_device *dev; >+ int ifindex; >+ >+ if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_IFINDEX)) >+ return -EINVAL; >+ >+ ifindex = nla_get_u32(info->attrs[NET_SHAPER_A_IFINDEX]); >+ dev = dev_get_by_index(ns, ifindex); >+ if (!dev) { >+ GENL_SET_ERR_MSG_FMT(info, "device %d not found", ifindex); >+ return -EINVAL; >+ } >+ >+ if (!dev->netdev_ops->net_shaper_ops) { >+ GENL_SET_ERR_MSG_FMT(info, "device %s does not support H/W shaper", >+ dev->name); >+ netdev_put(dev, NULL); >+ return -EOPNOTSUPP; >+ } >+ >+ *pdev = dev; >+ return 0; >+} >+ >+static struct xarray *__sc_container(struct net_device *dev) >+{ >+ return dev->net_shaper_data ? &dev->net_shaper_data->shapers : NULL; >+} >+ >+/* lookup the given shaper inside the cache */ >+static struct net_shaper_info *sc_lookup(struct net_device *dev, u32 handle) >+{ >+ struct xarray *xa = __sc_container(dev); >+ >+ return xa ? xa_load(xa, handle) : NULL; >+} >+ >+static int parse_handle(const struct nlattr *attr, const struct genl_info *info, >+ u32 *handle) >+{ >+ struct nlattr *tb[NET_SHAPER_A_ID + 1]; >+ struct nlattr *scope_attr, *id_attr; >+ enum net_shaper_scope scope; >+ u32 id = 0; >+ int ret; >+ >+ ret = nla_parse_nested(tb, NET_SHAPER_A_ID, attr, >+ net_shaper_handle_nl_policy, info->extack); >+ if (ret < 0) >+ return ret; >+ >+ scope_attr = tb[NET_SHAPER_A_SCOPE]; >+ if (!scope_attr) { >+ GENL_SET_ERR_MSG(info, "Missing 'scope' attribute for handle"); >+ return -EINVAL; >+ } >+ >+ scope = nla_get_u32(scope_attr); >+ >+ /* the default id for detached scope shapers is an invalid one >+ * to help the 'group' operation discriminate between new >+ * detached shaper creation (ID_UNSPEC) and reuse of existing >+ * shaper (any other value) >+ */ >+ id_attr = tb[NET_SHAPER_A_ID]; >+ if (id_attr) >+ id = nla_get_u32(id_attr); >+ else if (scope == NET_SHAPER_SCOPE_DETACHED) >+ id = NET_SHAPER_ID_UNSPEC; >+ >+ *handle = net_shaper_make_handle(scope, id); >+ return 0; >+} >+ > int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) > { >- return -EOPNOTSUPP; >+ struct net_shaper_info *shaper; >+ struct net_device *dev; >+ struct sk_buff *msg; >+ u32 handle; >+ int ret; >+ >+ ret = fetch_dev(info, &dev); This is quite net_device centric. Devlink rate shaper should be eventually visible throught this api as well, won't they? How do you imagine that? Could we have various types of binding? Something like: NET_SHAPER_A_BINDING nest NET_SHAPER_A_BINDING_IFINDEX u32 or: NET_SHAPER_A_BINDING nest NET_SHAPER_A_BINDING_DEVLINK_PORT nest DEVLINK_ATTR_BUS_NAME string DEVLINK_ATTR_DEV_NAME string DEVLINK_ATTR_PORT_INDEX u32 ? >+ if (ret) >+ return ret; >+ >+ if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_HANDLE)) >+ goto put; >+ >+ ret = parse_handle(info->attrs[NET_SHAPER_A_HANDLE], info, &handle); >+ if (ret < 0) >+ goto put; >+ >+ shaper = sc_lookup(dev, handle); >+ if (!shaper) { >+ GENL_SET_ERR_MSG_FMT(info, "Can't find shaper for handle %x", handle); >+ ret = -EINVAL; >+ goto put; >+ } >+ >+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); >+ if (!msg) { >+ ret = -ENOMEM; >+ goto put; >+ } >+ >+ ret = net_shaper_fill_one(msg, shaper, info); >+ if (ret) >+ goto free_msg; >+ >+ ret = genlmsg_reply(msg, info); >+ if (ret) >+ goto free_msg; >+ >+put: >+ netdev_put(dev, NULL); >+ return ret; >+ >+free_msg: >+ nlmsg_free(msg); >+ goto put; > } > > int net_shaper_nl_get_dumpit(struct sk_buff *skb, > struct netlink_callback *cb) > { >- return -EOPNOTSUPP; >+ struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; >+ const struct genl_info *info = genl_info_dump(cb); >+ struct net_shaper_info *shaper; >+ struct net_device *dev; >+ unsigned long handle; >+ int ret; >+ >+ ret = fetch_dev(info, &dev); >+ if (ret) >+ return ret; >+ >+ BUILD_BUG_ON(sizeof(struct net_shaper_nl_ctx) > sizeof(cb->ctx)); >+ >+ /* don't error out dumps performed before any set operation */ >+ if (!dev->net_shaper_data) { >+ ret = 0; >+ goto put; >+ } >+ >+ xa_for_each_range(&dev->net_shaper_data->shapers, handle, shaper, >+ ctx->start_handle, U32_MAX) { >+ ret = net_shaper_fill_one(skb, shaper, info); >+ if (ret) >+ goto put; >+ >+ ctx->start_handle = handle; >+ } >+ >+put: >+ netdev_put(dev, NULL); >+ return ret; > } > > int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info) >@@ -26,6 +249,30 @@ int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info) > return -EOPNOTSUPP; > } > >+int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) >+{ >+ return -EOPNOTSUPP; >+} >+ >+void dev_shaper_flush(struct net_device *dev) >+{ >+ struct xarray *xa = __sc_container(dev); >+ struct net_shaper_info *cur; >+ unsigned long index; >+ >+ if (!xa) >+ return; >+ >+ xa_lock(xa); >+ xa_for_each(xa, index, cur) { >+ __xa_erase(xa, index); >+ kfree(cur); >+ } >+ idr_destroy(&dev->net_shaper_data->detached_ids); >+ xa_unlock(xa); >+ kfree(xa); >+} >+ > static int __init shaper_init(void) fetch_dev fill_handle parse_handle sc_lookup __sc_container dev_shaper_flush shaper_init Could you perhaps maintain net_shaper_ prefix for all of there? > { > return genl_register_family(&net_shaper_nl_family); >-- >2.45.2 >
On Tue, 30 Jul 2024 22:39:46 +0200 Paolo Abeni wrote: > +#include <linux/types.h> > +#include <linux/bits.h> > +#include <linux/bitfield.h> > +#include <linux/netdevice.h> > +#include <linux/netlink.h> most of these could be replaced by forward declaration of types > +#include <uapi/linux/net_shaper.h> > + > +/** > + * struct net_shaper_info - represents a shaping node on the NIC H/W > + * zeroed field are considered not set. > + * @handle: Unique identifier for the shaper, see @net_shaper_make_handle > + * @parent: Unique identifier for the shaper parent, usually implied. Only > + * NET_SHAPER_SCOPE_QUEUE, NET_SHAPER_SCOPE_NETDEV and NET_SHAPER_SCOPE_DETACHED > + * can have the parent handle explicitly set, placing such shaper under > + * the specified parent. > + * @metric: Specify if the bw limits refers to PPS or BPS > + * @bw_min: Minimum guaranteed rate for this shaper > + * @bw_max: Maximum peak bw allowed for this shaper > + * @burst: Maximum burst for the peek rate of this shaper > + * @priority: Scheduling priority for this shaper > + * @weight: Scheduling weight for this shaper > + * @children: Number of nested shapers, accounted only for DETACHED scope > + */ > +struct net_shaper_info { > + u32 handle; > + u32 parent; > + enum net_shaper_metric metric; > + u64 bw_min; > + u64 bw_max; > + u64 burst; > + u32 priority; > + u32 weight; > + u32 children; > +}; > + > +/** > + * define NET_SHAPER_SCOPE_VF - Shaper scope > + * > + * This shaper scope is not exposed to user-space; the shaper is attached to > + * the given virtual function. > + */ > +#define NET_SHAPER_SCOPE_VF __NET_SHAPER_SCOPE_MAX > + > +/** > + * struct net_shaper_ops - Operations on device H/W shapers > + * > + * The initial shaping configuration ad device initialization is empty/ > + * a no-op/does not constraint the b/w in any way. > + * The network core keeps track of the applied user-configuration in > + * per device storage. > + * > + * Each shaper is uniquely identified within the device with an 'handle', > + * dependent on the shaper scope and other data, see @shaper_make_handle() we need to document locking, seems like the locking is.. missing? Or at least I don't see what prevents two deletes from racing. > + */ > +struct net_shaper_ops { > + /** > + * @group: create the specified shapers group > + * > + * Nest the specified @inputs shapers under the given @output shaper > + * on the network device @dev. The @input shaper array size is specified > + * by @nr_input. > + * Create either the @inputs and the @output shaper as needed, > + * otherwise move them as needed. Can't create @inputs shapers with > + * NET_SHAPER_SCOPE_DETACHED scope, a separate @group call with such > + * shaper as @output is needed. > + * > + * Returns 0 on group successfully created, otherwise an negative > + * error value and set @extack to describe the failure's reason. > + */ > + int (*group)(struct net_device *dev, int nr_input, > + const struct net_shaper_info *inputs, > + const struct net_shaper_info *output, > + struct netlink_ext_ack *extack); > + > + /* the default id for detached scope shapers is an invalid one > + * to help the 'group' operation discriminate between new > + * detached shaper creation (ID_UNSPEC) and reuse of existing > + * shaper (any other value) > + */ > + id_attr = tb[NET_SHAPER_A_ID]; > + if (id_attr) > + id = nla_get_u32(id_attr); double space
Tue, Jul 30, 2024 at 10:39:46PM CEST, pabeni@redhat.com wrote: [...] >+ >+/** >+ * struct net_shaper_info - represents a shaping node on the NIC H/W >+ * zeroed field are considered not set. >+ * @handle: Unique identifier for the shaper, see @net_shaper_make_handle >+ * @parent: Unique identifier for the shaper parent, usually implied. Only >+ * NET_SHAPER_SCOPE_QUEUE, NET_SHAPER_SCOPE_NETDEV and NET_SHAPER_SCOPE_DETACHED >+ * can have the parent handle explicitly set, placing such shaper under >+ * the specified parent. >+ * @metric: Specify if the bw limits refers to PPS or BPS >+ * @bw_min: Minimum guaranteed rate for this shaper >+ * @bw_max: Maximum peak bw allowed for this shaper "rate" to be consitent with the text one line above? >+ * @burst: Maximum burst for the peek rate of this shaper >+ * @priority: Scheduling priority for this shaper >+ * @weight: Scheduling weight for this shaper >+ * @children: Number of nested shapers, accounted only for DETACHED scope "children" are "inputs"? Or "nested shapers". I'm lost in the terminology, again. >+ */ >+struct net_shaper_info { >+ u32 handle; I wonder if the handle should be part of this structure. The structure describes the shaper attributes. The handle is identification. In the ops below, you sometimes pass the handle as a part of net_shaper_info structure (group, set), yet for delete op you pass handle directly. Wouldn't it be nicer to pass the same handle attr every time so it is clear what it is? >+ u32 parent; >+ enum net_shaper_metric metric; >+ u64 bw_min; >+ u64 bw_max; >+ u64 burst; >+ u32 priority; >+ u32 weight; >+ u32 children; >+}; >+ >+/** >+ * define NET_SHAPER_SCOPE_VF - Shaper scope >+ * >+ * This shaper scope is not exposed to user-space; the shaper is attached to >+ * the given virtual function. >+ */ >+#define NET_SHAPER_SCOPE_VF __NET_SHAPER_SCOPE_MAX This is unused, why do you introduce it here? >+ >+/** >+ * struct net_shaper_ops - Operations on device H/W shapers >+ * >+ * The initial shaping configuration ad device initialization is empty/ s/as/and ? >+ * a no-op/does not constraint the b/w in any way. "bw","b/w","rate". Better to be consistent, I suppose. >+ * The network core keeps track of the applied user-configuration in >+ * per device storage. "per device storage" reads weird. "net_device structure"? IDK. >+ * >+ * Each shaper is uniquely identified within the device with an 'handle', >+ * dependent on the shaper scope and other data, see @shaper_make_handle() The name of the function is net_shaper_make_handle(). You refer to "other data". What is that? I see no such thing in the code. >+ */ >+struct net_shaper_ops { >+ /** >+ * @group: create the specified shapers group >+ * >+ * Nest the specified @inputs shapers under the given @output shaper >+ * on the network device @dev. The @input shaper array size is specified >+ * by @nr_input. >+ * Create either the @inputs and the @output shaper as needed, >+ * otherwise move them as needed. I don't understand the sentense. So this creates either passed inputs and output shaper, or if they already exist, it links them together in desired way? Or what do you mean by "move"? >+ Can't create @inputs shapers with >+ * NET_SHAPER_SCOPE_DETACHED scope, a separate @group call with such >+ * shaper as @output is needed. I don't understand meaning of the sentence. The caller should make sure that NET_SHAPER_SCOPE_DETACHED is not in the inputs, so drivers do not have to sanitize it one by one. Then this sentence is not needed here, is it? >+ * >+ * Returns 0 on group successfully created, otherwise an negative >+ * error value and set @extack to describe the failure's reason. >+ */ >+ int (*group)(struct net_device *dev, int nr_input, Perhaps better name would be "nr_inputs" or "inputs_count"? >+ const struct net_shaper_info *inputs, >+ const struct net_shaper_info *output, >+ struct netlink_ext_ack *extack); >+ >+ /** >+ * @set: Updates the specified shaper >+ * >+ * Updates or creates the specified @shaper on the given device >+ * @dev. Can't create NET_SHAPER_SCOPE_DETACHED shapers, use @group >+ * instead. Again, similar to the NET_SHAPER_SCOPE_DETACHED comment above, the caller should sanitize this. Why is this needed here then? >+ * >+ * Returns 0 on group successfully created, otherwise an negative Which group? C&P mistake I suppose? >+ * error value and set @extack to describe the failure's reason. >+ */ >+ int (*set)(struct net_device *dev, >+ const struct net_shaper_info *shaper, >+ struct netlink_ext_ack *extack); >+ >+ /** >+ * @delete: Removes the specified shaper from the NIC >+ * >+ * Removes the shaper configuration as identified by the given @handle >+ * on the specified device @dev, restoring the default behavior. >+ * >+ * Returns 0 on group successfully created, otherwise an negative Which group? C&P mistake I suppose? >+ * error value and set @extack to describe the failure's reason. >+ */ >+ int (*delete)(struct net_device *dev, u32 handle, >+ struct netlink_ext_ack *extack); >+}; >+ >+#define NET_SHAPER_SCOPE_SHIFT 26 >+#define NET_SHAPER_ID_MASK GENMASK(NET_SHAPER_SCOPE_SHIFT - 1, 0) >+#define NET_SHAPER_SCOPE_MASK GENMASK(31, NET_SHAPER_SCOPE_SHIFT) >+ >+#define NET_SHAPER_ID_UNSPEC NET_SHAPER_ID_MASK >+ >+/** >+ * net_shaper_make_handle - creates an unique shaper identifier >+ * @scope: the shaper scope >+ * @id: the shaper id number >+ * >+ * Return: an unique identifier for the shaper >+ * >+ * Combines the specified arguments to create an unique identifier for >+ * the shaper. The @id argument semantic depends on the >+ * specified scope. >+ * For @NET_SHAPER_SCOPE_QUEUE_GROUP, @id is the queue group id >+ * For @NET_SHAPER_SCOPE_QUEUE, @id is the queue number. >+ * For @NET_SHAPER_SCOPE_VF, @id is virtual function number. >+ */ >+static inline u32 net_shaper_make_handle(enum net_shaper_scope scope, Why is this in the header? It is used only in shaper.c. Should be private to it. >+ int id) >+{ >+ return FIELD_PREP(NET_SHAPER_SCOPE_MASK, scope) | >+ FIELD_PREP(NET_SHAPER_ID_MASK, id); >+} >+ >+/** >+ * net_shaper_handle_scope - extract the scope from the given handle >+ * @handle: the shaper handle >+ * >+ * Return: the corresponding scope >+ */ >+static inline enum net_shaper_scope net_shaper_handle_scope(u32 handle) >+{ >+ return FIELD_GET(NET_SHAPER_SCOPE_MASK, handle); >+} >+ >+/** >+ * net_shaper_handle_id - extract the id number from the given handle >+ * @handle: the shaper handle >+ * >+ * Return: the corresponding id number >+ */ >+static inline int net_shaper_handle_id(u32 handle) I undertand that you use the handle for uapi and xaarray index purposes as consolidated u32. But, for the driver ops, would it be better to pass scope and id trough the ops to drivers? Driver then don't need to call these helpers and they can all stay private to shaper.c >+{ >+ return FIELD_GET(NET_SHAPER_ID_MASK, handle); >+} >+ >+#endif >+ [...]
On 8/1/24 15:42, Jiri Pirko wrote: > Tue, Jul 30, 2024 at 10:39:46PM CEST, pabeni@redhat.com wrote: >> +/** >> + * net_shaper_make_handle - creates an unique shaper identifier >> + * @scope: the shaper scope >> + * @id: the shaper id number >> + * >> + * Return: an unique identifier for the shaper >> + * >> + * Combines the specified arguments to create an unique identifier for >> + * the shaper. The @id argument semantic depends on the >> + * specified scope. >> + * For @NET_SHAPER_SCOPE_QUEUE_GROUP, @id is the queue group id >> + * For @NET_SHAPER_SCOPE_QUEUE, @id is the queue number. >> + * For @NET_SHAPER_SCOPE_VF, @id is virtual function number. >> + */ >> +static inline u32 net_shaper_make_handle(enum net_shaper_scope scope, >> + int id) >> +{ >> + return FIELD_PREP(NET_SHAPER_SCOPE_MASK, scope) | >> + FIELD_PREP(NET_SHAPER_ID_MASK, id); > > Perhaps some scopes may find only part of u32 as limitting for id in > the future? I find it elegant to have it in single u32 though. u64 may > be nicer (I know, xarray) :) With this code the id limit is 2^26 for each scope. The most capable H/W I'm aware of supports at most 64K shapers, overall. Are you aware of any specific constraint we need to address? [...] >> int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) >> { >> - return -EOPNOTSUPP; >> + struct net_shaper_info *shaper; >> + struct net_device *dev; >> + struct sk_buff *msg; >> + u32 handle; >> + int ret; >> + >> + ret = fetch_dev(info, &dev); > > This is quite net_device centric. Devlink rate shaper should be > eventually visible throught this api as well, won't they? How do you > imagine that? I'm unsure we are on the same page. Do you foresee this to replace and obsoleted the existing devlink rate API? It was not our so far. > Could we have various types of binding? Something like: > > NET_SHAPER_A_BINDING nest > NET_SHAPER_A_BINDING_IFINDEX u32 > > or: > NET_SHAPER_A_BINDING nest > NET_SHAPER_A_BINDING_DEVLINK_PORT nest > DEVLINK_ATTR_BUS_NAME string > DEVLINK_ATTR_DEV_NAME string > DEVLINK_ATTR_PORT_INDEX u32 > > ? Somewhat related, the current get()/dump() operations currently don't return the shaper ifindex. I guess we can include 'scope' and 'id' under NET_SHAPER_A_BINDING and replace the existing handle attribute with it. It should cover eventual future devlink extensions and provide all the relevant info for get/dump sake. >> + >> static int __init shaper_init(void) > > > > fetch_dev > fill_handle > parse_handle > sc_lookup > __sc_container > dev_shaper_flush > shaper_init > > > Could you perhaps maintain net_shaper_ prefix for all of there? Most of the helpers are static and should never be visible outside this compilation unit, so I did not bother with a prefix, I'll add it in the next revision. Thanks, Paolo
Tue, Aug 13, 2024 at 05:17:12PM CEST, pabeni@redhat.com wrote: >On 8/1/24 15:42, Jiri Pirko wrote: >> Tue, Jul 30, 2024 at 10:39:46PM CEST, pabeni@redhat.com wrote: >> > +/** >> > + * net_shaper_make_handle - creates an unique shaper identifier >> > + * @scope: the shaper scope >> > + * @id: the shaper id number >> > + * >> > + * Return: an unique identifier for the shaper >> > + * >> > + * Combines the specified arguments to create an unique identifier for >> > + * the shaper. The @id argument semantic depends on the >> > + * specified scope. >> > + * For @NET_SHAPER_SCOPE_QUEUE_GROUP, @id is the queue group id >> > + * For @NET_SHAPER_SCOPE_QUEUE, @id is the queue number. >> > + * For @NET_SHAPER_SCOPE_VF, @id is virtual function number. >> > + */ >> > +static inline u32 net_shaper_make_handle(enum net_shaper_scope scope, >> > + int id) >> > +{ >> > + return FIELD_PREP(NET_SHAPER_SCOPE_MASK, scope) | >> > + FIELD_PREP(NET_SHAPER_ID_MASK, id); >> >> Perhaps some scopes may find only part of u32 as limitting for id in >> the future? I find it elegant to have it in single u32 though. u64 may >> be nicer (I know, xarray) :) > >With this code the id limit is 2^26 for each scope. The most capable H/W I'm >aware of supports at most 64K shapers, overall. Are you aware of any specific >constraint we need to address? Nope. Just thinking out loud. > >[...] >> > int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) >> > { >> > - return -EOPNOTSUPP; >> > + struct net_shaper_info *shaper; >> > + struct net_device *dev; >> > + struct sk_buff *msg; >> > + u32 handle; >> > + int ret; >> > + >> > + ret = fetch_dev(info, &dev); >> >> This is quite net_device centric. Devlink rate shaper should be >> eventually visible throught this api as well, won't they? How do you >> imagine that? > >I'm unsure we are on the same page. Do you foresee this to replace and >obsoleted the existing devlink rate API? It was not our so far. Driver-api-wise, yes. I believe that was the goal, to have drivers to implement one rate api. > >> Could we have various types of binding? Something like: >> >> NET_SHAPER_A_BINDING nest >> NET_SHAPER_A_BINDING_IFINDEX u32 >> >> or: >> NET_SHAPER_A_BINDING nest >> NET_SHAPER_A_BINDING_DEVLINK_PORT nest >> DEVLINK_ATTR_BUS_NAME string >> DEVLINK_ATTR_DEV_NAME string >> DEVLINK_ATTR_PORT_INDEX u32 >> >> ? > >Somewhat related, the current get()/dump() operations currently don't return >the shaper ifindex. I guess we can include 'scope' and 'id' under >NET_SHAPER_A_BINDING and replace the existing handle attribute with it. > >It should cover eventual future devlink extensions and provide all the >relevant info for get/dump sake. Sounds fine. > >> > + >> > static int __init shaper_init(void) >> >> >> >> fetch_dev >> fill_handle >> parse_handle >> sc_lookup >> __sc_container >> dev_shaper_flush >> shaper_init >> >> >> Could you perhaps maintain net_shaper_ prefix for all of there? > >Most of the helpers are static and should never be visible outside this >compilation unit, so I did not bother with a prefix, I'll add it in the next >revision. Thanks. > >Thanks, > >Paolo >
On 8/14/24 10:37, Jiri Pirko wrote: > Tue, Aug 13, 2024 at 05:17:12PM CEST, pabeni@redhat.com wrote: >> On 8/1/24 15:42, Jiri Pirko wrote: >> [...] >>>> int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) >>>> { >>>> - return -EOPNOTSUPP; >>>> + struct net_shaper_info *shaper; >>>> + struct net_device *dev; >>>> + struct sk_buff *msg; >>>> + u32 handle; >>>> + int ret; >>>> + >>>> + ret = fetch_dev(info, &dev); >>> >>> This is quite net_device centric. Devlink rate shaper should be >>> eventually visible throught this api as well, won't they? How do you >>> imagine that? >> >> I'm unsure we are on the same page. Do you foresee this to replace and >> obsoleted the existing devlink rate API? It was not our so far. > > Driver-api-wise, yes. I believe that was the goal, to have drivers to > implement one rate api. I initially underlooked at this point, I'm sorry. Re-reading this I think we are not on the same page. The net_shaper_ops are per network device operations: they are aimed (also) at consolidating network device shaping related callbacks, but they can't operate on non-network device objects (devlink port). Cheers, Paolo
Fri, Aug 16, 2024 at 10:52:58AM CEST, pabeni@redhat.com wrote: >On 8/14/24 10:37, Jiri Pirko wrote: >> Tue, Aug 13, 2024 at 05:17:12PM CEST, pabeni@redhat.com wrote: >> > On 8/1/24 15:42, Jiri Pirko wrote: >> > [...] >> > > > int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) >> > > > { >> > > > - return -EOPNOTSUPP; >> > > > + struct net_shaper_info *shaper; >> > > > + struct net_device *dev; >> > > > + struct sk_buff *msg; >> > > > + u32 handle; >> > > > + int ret; >> > > > + >> > > > + ret = fetch_dev(info, &dev); >> > > >> > > This is quite net_device centric. Devlink rate shaper should be >> > > eventually visible throught this api as well, won't they? How do you >> > > imagine that? >> > >> > I'm unsure we are on the same page. Do you foresee this to replace and >> > obsoleted the existing devlink rate API? It was not our so far. >> >> Driver-api-wise, yes. I believe that was the goal, to have drivers to >> implement one rate api. > >I initially underlooked at this point, I'm sorry. > >Re-reading this I think we are not on the same page. > >The net_shaper_ops are per network device operations: they are aimed (also) >at consolidating network device shaping related callbacks, but they can't >operate on non-network device objects (devlink port). Why not? > >Cheers, > >Paolo >
On 8/16/24 11:16, Jiri Pirko wrote: > Fri, Aug 16, 2024 at 10:52:58AM CEST, pabeni@redhat.com wrote: >> On 8/14/24 10:37, Jiri Pirko wrote: >>> Tue, Aug 13, 2024 at 05:17:12PM CEST, pabeni@redhat.com wrote: >>>> On 8/1/24 15:42, Jiri Pirko wrote: >>>> [...] >>>>>> int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) >>>>>> { >>>>>> - return -EOPNOTSUPP; >>>>>> + struct net_shaper_info *shaper; >>>>>> + struct net_device *dev; >>>>>> + struct sk_buff *msg; >>>>>> + u32 handle; >>>>>> + int ret; >>>>>> + >>>>>> + ret = fetch_dev(info, &dev); >>>>> >>>>> This is quite net_device centric. Devlink rate shaper should be >>>>> eventually visible throught this api as well, won't they? How do you >>>>> imagine that? >>>> >>>> I'm unsure we are on the same page. Do you foresee this to replace and >>>> obsoleted the existing devlink rate API? It was not our so far. >>> >>> Driver-api-wise, yes. I believe that was the goal, to have drivers to >>> implement one rate api. >> >> I initially underlooked at this point, I'm sorry. >> >> Re-reading this I think we are not on the same page. >> >> The net_shaper_ops are per network device operations: they are aimed (also) >> at consolidating network device shaping related callbacks, but they can't >> operate on non-network device objects (devlink port). > > Why not? Isn't the whole point of devlink to configure objects that are directly related to any network device? Would be somewhat awkward accessing devlink port going through some net_device? Side note: I experimented adding the 'binging' abstraction to this API and gives a quite significant uglification to the user syntax (due to the additional nesting required) and the code. Still, if there is a very strong need for controlling devlink rate via this API _and_ we can assume that each net_device "relates" (/references/is connected to) at most a single devlink object (out of sheer ignorance on my side I'm unsure about this point, but skimming over the existing implementations it looks so), the current API definition would be IMHO sufficient and clean enough to reach for both devlink port rate objects and devlink node rate objects. We could define additional scopes for each of such objects and use the id to discriminate the specific port or node within the relevant devlink. I think such scopes definition should come with related implementation, e.g. not with this series. Thanks, Paolo
Mon, Aug 19, 2024 at 11:33:28AM CEST, pabeni@redhat.com wrote: > > >On 8/16/24 11:16, Jiri Pirko wrote: >> Fri, Aug 16, 2024 at 10:52:58AM CEST, pabeni@redhat.com wrote: >> > On 8/14/24 10:37, Jiri Pirko wrote: >> > > Tue, Aug 13, 2024 at 05:17:12PM CEST, pabeni@redhat.com wrote: >> > > > On 8/1/24 15:42, Jiri Pirko wrote: >> > > > [...] >> > > > > > int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) >> > > > > > { >> > > > > > - return -EOPNOTSUPP; >> > > > > > + struct net_shaper_info *shaper; >> > > > > > + struct net_device *dev; >> > > > > > + struct sk_buff *msg; >> > > > > > + u32 handle; >> > > > > > + int ret; >> > > > > > + >> > > > > > + ret = fetch_dev(info, &dev); >> > > > > >> > > > > This is quite net_device centric. Devlink rate shaper should be >> > > > > eventually visible throught this api as well, won't they? How do you >> > > > > imagine that? >> > > > >> > > > I'm unsure we are on the same page. Do you foresee this to replace and >> > > > obsoleted the existing devlink rate API? It was not our so far. >> > > >> > > Driver-api-wise, yes. I believe that was the goal, to have drivers to >> > > implement one rate api. >> > >> > I initially underlooked at this point, I'm sorry. >> > >> > Re-reading this I think we are not on the same page. >> > >> > The net_shaper_ops are per network device operations: they are aimed (also) >> > at consolidating network device shaping related callbacks, but they can't >> > operate on non-network device objects (devlink port). >> >> Why not? > >Isn't the whole point of devlink to configure objects that are directly >related to any network device? Would be somewhat awkward accessing devlink Yeah, not even network. Just "a device". >port going through some net_device? I'm not sure why you are asking that. I didn't suggest anything like that. On contrary, as your API is netdev centric, I suggested to disconnect from netdev to the shapers could be used not only with them. This is what I understood was a plan from very beginning. I may be wrong though.... > >Side note: I experimented adding the 'binging' abstraction to this API and >gives a quite significant uglification to the user syntax (due to the >additional nesting required) and the code. > >Still, if there is a very strong need for controlling devlink rate via this >API _and_ we can assume that each net_device "relates" (/references/is >connected to) at most a single devlink object (out of sheer ignorance on my >side I'm unsure about this point, but skimming over the existing >implementations it looks so), the current API definition would be IMHO >sufficient and clean enough to reach for both devlink port rate objects and >devlink node rate objects. Don't assume this. Not always true. > >We could define additional scopes for each of such objects and use the id to >discriminate the specific port or node within the relevant devlink. But you still want to use some netdevice as a handle IIUC, is that right? > >I think such scopes definition should come with related implementation, e.g. >not with this series. > >Thanks, > >Paolo >
On 8/19/24 13:53, Jiri Pirko wrote: > Mon, Aug 19, 2024 at 11:33:28AM CEST, pabeni@redhat.com wrote: >> Isn't the whole point of devlink to configure objects that are directly >> related to any network device? Would be somewhat awkward accessing devlink >> port going through some net_device? > > I'm not sure why you are asking that. I didn't suggest anything like > that. On contrary, as your API is netdev centric, I suggested to > disconnect from netdev to the shapers could be used not only with them. ndo_shaper_ops are basically net_device ndo. Any implementation of them will operate 'trough some net_device'. I'm still not sure which one of the following you mean: 1) the shaper NL interface must be able to manage devlink (rate) objects. The core will lookup the devlink obj and use devlink_ops to do the change. 2) the shaper NL interface must be able to manage devlink (rate) objects, the core will use ndo_shaper_ops to do the actual change. 3) something else? In my previous reply, I assumed you wanted option 2). If so, which kind of object should implement the ndo_shaper_ops callbacks? net_device? devlink? other? > This is what I understood was a plan from very beginning. Originally the scope was much more limited than what defined here. Jakub asked to implement an interface capable to unify the network device shaping/rate related callbacks. In a previous revision, I stretched that to cover objects "above" the network device level (e.g. PF/VF/SFs groups), but then I left them out because: - it caused several inconsistencies (among other thing we can't use the 'shaper cache' there and Jakub wants the cache in place). - we already have devlink for that. >> We could define additional scopes for each of such objects and use the id to >> discriminate the specific port or node within the relevant devlink. > > But you still want to use some netdevice as a handle IIUC, is that > right? The revision of the series that I hope to share soon still used net_device ndos. Shapers are identified within the network device by an handle. Cheers, Paolo
Mon, Aug 19, 2024 at 06:52:22PM CEST, pabeni@redhat.com wrote: >On 8/19/24 13:53, Jiri Pirko wrote: >> Mon, Aug 19, 2024 at 11:33:28AM CEST, pabeni@redhat.com wrote: >> > Isn't the whole point of devlink to configure objects that are directly >> > related to any network device? Would be somewhat awkward accessing devlink >> > port going through some net_device? >> >> I'm not sure why you are asking that. I didn't suggest anything like >> that. On contrary, as your API is netdev centric, I suggested to >> disconnect from netdev to the shapers could be used not only with them. > >ndo_shaper_ops are basically net_device ndo. Any implementation of them will >operate 'trough some net_device'. I know, I see that in the code. But the question is, does it have to be like that? > >I'm still not sure which one of the following you mean: > >1) the shaper NL interface must be able to manage devlink (rate) objects. The >core will lookup the devlink obj and use devlink_ops to do the change. > >2) the shaper NL interface must be able to manage devlink (rate) objects, the >core will use ndo_shaper_ops to do the actual change. > >3) something else? I don't care about the shaper NL in case of devlink rate objects. I care more about in-kernel api. I see shaper NL as one of the UAPIs to consume the shaper infrastructure. The devlink rate is another one. If the devlink rate shapers are visible over shaper NL, IDK. They may be RO perhaps. > >In my previous reply, I assumed you wanted option 2). If so, which kind of >object should implement the ndo_shaper_ops callbacks? net_device? devlink? >other? Whoever implements the shaper in driver. If that is net_device tight shaper, driver should work with net_device. If that is devlink port related shaper, driver should work on top of devlink port based api. > >> This is what I understood was a plan from very beginning. > >Originally the scope was much more limited than what defined here. Jakub >asked to implement an interface capable to unify the network device >shaping/rate related callbacks. I'm not saying this is deal breaker for me. I just think that if the api is designed to be independent of the object shaper is bound to (netdev/devlink_port/etc), it would be much much easier to extend in the future. If you do everything netdev-centric from start, I'm sure no shaper consolidation will ever happen. And that I thought was one of the goals. Perhaps Jakub has opinion. > >In a previous revision, I stretched that to cover objects "above" the network >device level (e.g. PF/VF/SFs groups), but then I left them out because: > >- it caused several inconsistencies (among other thing we can't use the >'shaper cache' there and Jakub wants the cache in place). >- we already have devlink for that. > >> > We could define additional scopes for each of such objects and use the id to >> > discriminate the specific port or node within the relevant devlink. >> >> But you still want to use some netdevice as a handle IIUC, is that >> right? > >The revision of the series that I hope to share soon still used net_device >ndos. Shapers are identified within the network device by an handle. > >Cheers, > >Paolo >
On Thu, 22 Aug 2024 14:02:54 +0200 Jiri Pirko wrote: >>> This is what I understood was a plan from very beginning. >> >> Originally the scope was much more limited than what defined here. Jakub >> asked to implement an interface capable to unify the network device >> shaping/rate related callbacks. > > I'm not saying this is deal breaker for me. I just think that if the api > is designed to be independent of the object shaper is bound to > (netdev/devlink_port/etc), it would be much much easier to extend in the > future. If you do everything netdev-centric from start, I'm sure no > shaper consolidation will ever happen. And that I thought was one of the > goals. > > Perhaps Jakub has opinion. I think you and I are on the same page :) Other than the "reference object" (netdev / devlink port) the driver facing API should be identical. Making it possible for the same driver code to handle translating the parameters into HW config / FW requests, whether they shape at the device (devlink) or port (netdev) level. Shaper NL for netdevs is separate from internal representation and driver API in my mind. My initial ask was to create the internal representation first, make sure it can express devlink and handful of exiting netdev APIs, and only once that's merged worry about exposing it via a new NL. I'm not opposed to showing devlink shapers in netdev NL (RO as you say) but talking about it now strikes me as cart before the horse.
On 8/22/24 16:41, Jakub Kicinski wrote: > On Thu, 22 Aug 2024 14:02:54 +0200 Jiri Pirko wrote: >>>> This is what I understood was a plan from very beginning. >>> >>> Originally the scope was much more limited than what defined here. Jakub >>> asked to implement an interface capable to unify the network device >>> shaping/rate related callbacks. >> >> I'm not saying this is deal breaker for me. I just think that if the api >> is designed to be independent of the object shaper is bound to >> (netdev/devlink_port/etc), it would be much much easier to extend in the >> future. If you do everything netdev-centric from start, I'm sure no >> shaper consolidation will ever happen. And that I thought was one of the >> goals. >> >> Perhaps Jakub has opinion. > > I think you and I are on the same page :) Other than the "reference > object" (netdev / devlink port) the driver facing API should be > identical. Making it possible for the same driver code to handle > translating the parameters into HW config / FW requests, whether > they shape at the device (devlink) or port (netdev) level. > > Shaper NL for netdevs is separate from internal representation and > driver API in my mind. My initial ask was to create the internal > representation first, make sure it can express devlink and handful of > exiting netdev APIs, and only once that's merged worry about exposing > it via a new NL. > > I'm not opposed to showing devlink shapers in netdev NL (RO as you say) > but talking about it now strikes me as cart before the horse. FTR, I don't see both of you on the same page ?!? I read the above as Jiri's preference is a single ndo set to control both devlink and device shapers, while I read Jakub's preference as for different sets of operations that will use the same arguments to specify the shaper informations. Or to phrase the above differently, Jiri is focusing on the shaper "binding" (how to locate/access it) while Jakub is focusing on the shaper "info" (content/definition/attributes). Please correct me If I misread something. Still for the record, I interpret the current proposal as not clashing with Jakub's preference, and being tolerated from Jiri, again please correct me if I read too far. Thanks, Paolo
On Thu, 22 Aug 2024 22:30:35 +0200 Paolo Abeni wrote: > >> I'm not saying this is deal breaker for me. I just think that if the api > >> is designed to be independent of the object shaper is bound to > >> (netdev/devlink_port/etc), it would be much much easier to extend in the > >> future. If you do everything netdev-centric from start, I'm sure no > >> shaper consolidation will ever happen. And that I thought was one of the > >> goals. > >> > >> Perhaps Jakub has opinion. > > > > I think you and I are on the same page :) Other than the "reference > > object" (netdev / devlink port) the driver facing API should be > > identical. Making it possible for the same driver code to handle > > translating the parameters into HW config / FW requests, whether > > they shape at the device (devlink) or port (netdev) level. > > > > Shaper NL for netdevs is separate from internal representation and > > driver API in my mind. My initial ask was to create the internal > > representation first, make sure it can express devlink and handful of > > exiting netdev APIs, and only once that's merged worry about exposing > > it via a new NL. > > > > I'm not opposed to showing devlink shapers in netdev NL (RO as you say) > > but talking about it now strikes me as cart before the horse. > > FTR, I don't see both of you on the same page ?!? > > I read the above as Jiri's preference is a single ndo set to control > both devlink and device shapers, while I read Jakub's preference as for > different sets of operations that will use the same arguments to specify > the shaper informations. Jiri replied: > which kind of object should implement the ndo_shaper_ops callbacks? Whoever implements the shaper in driver. If that is net_device tight shaper, driver should work with net_device. If that is devlink port related shaper, driver should work on top of devlink port based api. I interpret this as having two almost identical versions of shaper ops, the only difference is that one takes netdev and the other devlink port. We could simplify it slightly, and call the ndo for getting devlink port from netdev, and always pass devlink port in? I _think_ (but I'm not 100% sure) that Jiri does _not_ mean that we would be able to render the internal shaper tree as ops for the existing devlink rate API. Because that may cause scope creep, inconsistencies and duplication. > Or to phrase the above differently, Jiri is focusing on the shaper > "binding" (how to locate/access it) while Jakub is focusing on the > shaper "info" (content/definition/attributes). Please correct me If I > misread something. > > Still for the record, I interpret the current proposal as not clashing > with Jakub's preference, and being tolerated from Jiri, again please > correct me if I read too far. One more thing, Jiri said: If you do everything netdev-centric from start, I'm sure no shaper consolidation will ever happen. And that I thought was one of the goals. Consolidation was indeed one of the goals, and I share Jiri's concern :(
Fri, Aug 23, 2024 at 12:56:08AM CEST, kuba@kernel.org wrote: >On Thu, 22 Aug 2024 22:30:35 +0200 Paolo Abeni wrote: >> >> I'm not saying this is deal breaker for me. I just think that if the api >> >> is designed to be independent of the object shaper is bound to >> >> (netdev/devlink_port/etc), it would be much much easier to extend in the >> >> future. If you do everything netdev-centric from start, I'm sure no >> >> shaper consolidation will ever happen. And that I thought was one of the >> >> goals. >> >> >> >> Perhaps Jakub has opinion. >> > >> > I think you and I are on the same page :) Other than the "reference >> > object" (netdev / devlink port) the driver facing API should be >> > identical. Making it possible for the same driver code to handle >> > translating the parameters into HW config / FW requests, whether >> > they shape at the device (devlink) or port (netdev) level. >> > >> > Shaper NL for netdevs is separate from internal representation and >> > driver API in my mind. My initial ask was to create the internal >> > representation first, make sure it can express devlink and handful of >> > exiting netdev APIs, and only once that's merged worry about exposing >> > it via a new NL. >> > >> > I'm not opposed to showing devlink shapers in netdev NL (RO as you say) >> > but talking about it now strikes me as cart before the horse. >> >> FTR, I don't see both of you on the same page ?!? >> >> I read the above as Jiri's preference is a single ndo set to control "Ndo" stands for netdev op and they are all tightly coupled with netdevices. So, "single ndo set to control both devlink and netdev shapers" sounds like nonsense to me. >> both devlink and device shapers, while I read Jakub's preference as for >> different sets of operations that will use the same arguments to specify >> the shaper informations. > >Jiri replied: > > > which kind of object should implement the ndo_shaper_ops callbacks? > > Whoever implements the shaper in driver. If that is net_device tight > shaper, driver should work with net_device. If that is devlink port > related shaper, driver should work on top of devlink port based api. > >I interpret this as having two almost identical versions of shaper ops, >the only difference is that one takes netdev and the other devlink port. Could be done like that. But see more below. >We could simplify it slightly, and call the ndo for getting devlink >port from netdev, and always pass devlink port in? No please. Keep that separate. You can't always rely on devlink port having netdev paired with it. Plus, it would be odd callpath from devlink port code to netdev op. Not to mention locking :) > >I _think_ (but I'm not 100% sure) that Jiri does _not_ mean that we >would be able to render the internal shaper tree as ops for the >existing devlink rate API. Because that may cause scope creep, >inconsistencies and duplication. Not sure what you mean by this. Devlink rate UAPI will stay the same. Only the backend will use new driver API instead of the existing one. > >> Or to phrase the above differently, Jiri is focusing on the shaper >> "binding" (how to locate/access it) while Jakub is focusing on the >> shaper "info" (content/definition/attributes). Please correct me If I >> misread something. Two(or more) similar ops structs looks odd to me. I think that the ops should should be shared and just the "binding point" should be somehow abstracted out. Code speaks, let me draft how it could be done: enum net_shaper_binding_type { NET_SHAPER_BINDING_TYPE_NETDEV, NET_SHAPER_BINDING_TYPE_DEVLINK_PORT, }; struct net_shaper_binding { enum net_shaper_binding_type type; union { struct net_device *netdev; struct devlink_port *devlink_port; }; }; struct net_shaper_ops { + /** + * @group: create the specified shapers scheduling group + * + * Nest the @leaves shapers identified by @leaves_handles under the + * @root shaper identified by @root_handle. All the shapers belong + * to the network device @dev. The @leaves and @leaves_handles shaper + * arrays size is specified by @leaves_count. + * Create either the @leaves and the @root shaper; or if they already + * exists, links them together in the desired way. + * @leaves scope must be NET_SHAPER_SCOPE_QUEUE. + * + * Returns 0 on group successfully created, otherwise an negative + * error value and set @extack to describe the failure's reason. + */ + int (*group)(const struct net_shaper_binding *binding, int leaves_count, + const struct net_shaper_handle *leaves_handles, + const struct net_shaper_info *leaves, + const struct net_shaper_handle *root_handle, + const struct net_shaper_info *root, + struct netlink_ext_ack *extack); + + /** + * @set: Updates the specified shaper + * + * Updates or creates the @shaper identified by the provided @handle + * on the given device @dev. + * + * Returns 0 on success, otherwise an negative + * error value and set @extack to describe the failure's reason. + */ + int (*set)(const struct net_shaper_binding *binding, + const struct net_shaper_handle *handle, + const struct net_shaper_info *shaper, + struct netlink_ext_ack *extack); + + /** + * @delete: Removes the specified shaper from the NIC + * + * Removes the shaper configuration as identified by the given @handle + * on the specified device @dev, restoring the default behavior. + * + * Returns 0 on success, otherwise an negative + * error value and set @extack to describe the failure's reason. + */ + int (*delete)(const struct net_shaper_binding *binding, + const struct net_shaper_handle *handle, + struct netlink_ext_ack *extack); +}; static inline struct net_device * net_shaper_binding_netdev(struct net_shaper_binding *binding) { WARN_ON(binding->type != NET_SHAPER_BINDING_TYPE_NETDEV) return binding->netdev; } static inline struct devlink_port * net_shaper_binding_devlink_port(struct net_shaper_binding *binding) { WARN_ON(binding->type != NET_SHAPER_BINDING_TYPE_DEVLINK_PORT) return binding->devlink_port; } Then whoever calls the op fills-up the binding structure accordingly. drivers can implement ops, for netdev-bound shaper like this: static int driverx_shaper_set(const struct net_shaper_binding *binding, const struct net_shaper_handle *handle, const struct net_shaper_info *shaper, struct netlink_ext_ack *extack); { struct net_device *netdev = net_shaper_binding_netdev(binding); ...... } struct net_shaper_ops driverx_shaper_ops { .set = driverx_shaper_set; ...... }; static const struct net_device_ops driverx_netdev_ops = { .net_shaper_ops = &driverx_shaper_ops, ...... }; drivers can implement ops, for devlink_port-bound shaper like this: static int drivery_shaper_set(const struct net_shaper_binding *binding, const struct net_shaper_handle *handle, const struct net_shaper_info *shaper, struct netlink_ext_ack *extack); { struct devlink_port *devlink_port = net_shaper_binding_devlink_port(binding); ...... } struct net_shaper_ops drivery_shaper_ops { .set = drivery_shaper_set; ...... }; static const struct devlink_port_ops drivery_devlink_port_ops = { .port_shaper_ops = &drivery_shaper_ops, }; Some driver can even have one ops implementation for both, and distinguish just by looking at binding->type. >> >> Still for the record, I interpret the current proposal as not clashing >> with Jakub's preference, and being tolerated from Jiri, again please >> correct me if I read too far. > >One more thing, Jiri said: > > If you do everything netdev-centric from start, I'm sure no shaper > consolidation will ever happen. And that I thought was one of the goals. > >Consolidation was indeed one of the goals, and I share Jiri's concern :( Good.
On 8/23/24 13:50, Jiri Pirko wrote: > Fri, Aug 23, 2024 at 12:56:08AM CEST, kuba@kernel.org wrote: >> On Thu, 22 Aug 2024 22:30:35 +0200 Paolo Abeni wrote: >>>>> I'm not saying this is deal breaker for me. I just think that if the api >>>>> is designed to be independent of the object shaper is bound to >>>>> (netdev/devlink_port/etc), it would be much much easier to extend in the >>>>> future. If you do everything netdev-centric from start, I'm sure no >>>>> shaper consolidation will ever happen. And that I thought was one of the >>>>> goals. >>>>> >>>>> Perhaps Jakub has opinion. >>>> >>>> I think you and I are on the same page :) Other than the "reference >>>> object" (netdev / devlink port) the driver facing API should be >>>> identical. Making it possible for the same driver code to handle >>>> translating the parameters into HW config / FW requests, whether >>>> they shape at the device (devlink) or port (netdev) level. >>>> >>>> Shaper NL for netdevs is separate from internal representation and >>>> driver API in my mind. My initial ask was to create the internal >>>> representation first, make sure it can express devlink and handful of >>>> exiting netdev APIs, and only once that's merged worry about exposing >>>> it via a new NL. >>>> >>>> I'm not opposed to showing devlink shapers in netdev NL (RO as you say) >>>> but talking about it now strikes me as cart before the horse. >>> >>> FTR, I don't see both of you on the same page ?!? >>> >>> I read the above as Jiri's preference is a single ndo set to control > > "Ndo" stands for netdev op and they are all tightly coupled with > netdevices. So, "single ndo set to control both devlink and netdev > shapers" sounds like nonsense to me. In this context, "NDOs" == set of function pointers operating on the same object. >>> Or to phrase the above differently, Jiri is focusing on the shaper >>> "binding" (how to locate/access it) while Jakub is focusing on the >>> shaper "info" (content/definition/attributes). Please correct me If I >>> misread something. > > Two(or more) similar ops structs looks odd to me. I think that the ops > should should be shared and just the "binding point" should be somehow > abstracted out. Code speaks, let me draft how it could be done: > > enum net_shaper_binding_type { > NET_SHAPER_BINDING_TYPE_NETDEV, > NET_SHAPER_BINDING_TYPE_DEVLINK_PORT, > }; > > struct net_shaper_binding { > enum net_shaper_binding_type type; > union { > struct net_device *netdev; > struct devlink_port *devlink_port; > }; > }; > > struct net_shaper_ops { > + /** > + * @group: create the specified shapers scheduling group > + * > + * Nest the @leaves shapers identified by @leaves_handles under the > + * @root shaper identified by @root_handle. All the shapers belong > + * to the network device @dev. The @leaves and @leaves_handles shaper > + * arrays size is specified by @leaves_count. > + * Create either the @leaves and the @root shaper; or if they already > + * exists, links them together in the desired way. > + * @leaves scope must be NET_SHAPER_SCOPE_QUEUE. > + * > + * Returns 0 on group successfully created, otherwise an negative > + * error value and set @extack to describe the failure's reason. > + */ > + int (*group)(const struct net_shaper_binding *binding, int leaves_count, > + const struct net_shaper_handle *leaves_handles, > + const struct net_shaper_info *leaves, > + const struct net_shaper_handle *root_handle, > + const struct net_shaper_info *root, > + struct netlink_ext_ack *extack); > + > + /** > + * @set: Updates the specified shaper > + * > + * Updates or creates the @shaper identified by the provided @handle > + * on the given device @dev. > + * > + * Returns 0 on success, otherwise an negative > + * error value and set @extack to describe the failure's reason. > + */ > + int (*set)(const struct net_shaper_binding *binding, > + const struct net_shaper_handle *handle, > + const struct net_shaper_info *shaper, > + struct netlink_ext_ack *extack); > + > + /** > + * @delete: Removes the specified shaper from the NIC > + * > + * Removes the shaper configuration as identified by the given @handle > + * on the specified device @dev, restoring the default behavior. > + * > + * Returns 0 on success, otherwise an negative > + * error value and set @extack to describe the failure's reason. > + */ > + int (*delete)(const struct net_shaper_binding *binding, > + const struct net_shaper_handle *handle, > + struct netlink_ext_ack *extack); > +}; > > > static inline struct net_device * > net_shaper_binding_netdev(struct net_shaper_binding *binding) > { > WARN_ON(binding->type != NET_SHAPER_BINDING_TYPE_NETDEV) > return binding->netdev; > } > > static inline struct devlink_port * > net_shaper_binding_devlink_port(struct net_shaper_binding *binding) > { > WARN_ON(binding->type != NET_SHAPER_BINDING_TYPE_DEVLINK_PORT) > return binding->devlink_port; > } > > Then whoever calls the op fills-up the binding structure accordingly. > > > drivers can implement ops, for netdev-bound shaper like this: > > static int driverx_shaper_set(const struct net_shaper_binding *binding, > const struct net_shaper_handle *handle, > const struct net_shaper_info *shaper, > struct netlink_ext_ack *extack); > { > struct net_device *netdev = net_shaper_binding_netdev(binding); > ...... > } > > struct net_shaper_ops driverx_shaper_ops { > .set = driverx_shaper_set; > ...... > }; > > static const struct net_device_ops driverx_netdev_ops = { > .net_shaper_ops = &driverx_shaper_ops, > ...... > }; If I read correctly, the net_shaper_ops caller will have to discriminate between net_device and devlink, do the object-type-specific lookup to get the relevant net_device (or devlink) object and then pass to such net_device (or devlink) a "generic" binding. Did I misread something? If so I must admit I really dislike such interface. I personally think it would be much cleaner to have 2 separate set of operations, with exactly the same semantic and argument list, except for the first argument (struct net_device or struct devlink). The driver implementation could still de-duplicate a lot of code, as far as the shaper-related arguments are the same. Side note, if the intention is to allow the user to touch/modify the queue-level and queue-group-level shapers via the devlink object? if that is the intention, we will need to drop the shaper cache and (re-)introduce a get() callback, as the same shaper could be reached via multiple binding/handle pairs and the core will not know all of such pairs for a given shaper. Thanks, Paolo
Fri, Aug 23, 2024 at 02:58:27PM CEST, pabeni@redhat.com wrote: >On 8/23/24 13:50, Jiri Pirko wrote: >> Fri, Aug 23, 2024 at 12:56:08AM CEST, kuba@kernel.org wrote: >> > On Thu, 22 Aug 2024 22:30:35 +0200 Paolo Abeni wrote: >> > > > > I'm not saying this is deal breaker for me. I just think that if the api >> > > > > is designed to be independent of the object shaper is bound to >> > > > > (netdev/devlink_port/etc), it would be much much easier to extend in the >> > > > > future. If you do everything netdev-centric from start, I'm sure no >> > > > > shaper consolidation will ever happen. And that I thought was one of the >> > > > > goals. >> > > > > >> > > > > Perhaps Jakub has opinion. >> > > > >> > > > I think you and I are on the same page :) Other than the "reference >> > > > object" (netdev / devlink port) the driver facing API should be >> > > > identical. Making it possible for the same driver code to handle >> > > > translating the parameters into HW config / FW requests, whether >> > > > they shape at the device (devlink) or port (netdev) level. >> > > > >> > > > Shaper NL for netdevs is separate from internal representation and >> > > > driver API in my mind. My initial ask was to create the internal >> > > > representation first, make sure it can express devlink and handful of >> > > > exiting netdev APIs, and only once that's merged worry about exposing >> > > > it via a new NL. >> > > > >> > > > I'm not opposed to showing devlink shapers in netdev NL (RO as you say) >> > > > but talking about it now strikes me as cart before the horse. >> > > >> > > FTR, I don't see both of you on the same page ?!? >> > > >> > > I read the above as Jiri's preference is a single ndo set to control >> >> "Ndo" stands for netdev op and they are all tightly coupled with >> netdevices. So, "single ndo set to control both devlink and netdev >> shapers" sounds like nonsense to me. > >In this context, "NDOs" == set of function pointers operating on the same >object. > >> > > Or to phrase the above differently, Jiri is focusing on the shaper >> > > "binding" (how to locate/access it) while Jakub is focusing on the >> > > shaper "info" (content/definition/attributes). Please correct me If I >> > > misread something. >> >> Two(or more) similar ops structs looks odd to me. I think that the ops >> should should be shared and just the "binding point" should be somehow >> abstracted out. Code speaks, let me draft how it could be done: >> >> enum net_shaper_binding_type { >> NET_SHAPER_BINDING_TYPE_NETDEV, >> NET_SHAPER_BINDING_TYPE_DEVLINK_PORT, >> }; >> >> struct net_shaper_binding { >> enum net_shaper_binding_type type; >> union { >> struct net_device *netdev; >> struct devlink_port *devlink_port; >> }; >> }; >> >> struct net_shaper_ops { >> + /** >> + * @group: create the specified shapers scheduling group >> + * >> + * Nest the @leaves shapers identified by @leaves_handles under the >> + * @root shaper identified by @root_handle. All the shapers belong >> + * to the network device @dev. The @leaves and @leaves_handles shaper >> + * arrays size is specified by @leaves_count. >> + * Create either the @leaves and the @root shaper; or if they already >> + * exists, links them together in the desired way. >> + * @leaves scope must be NET_SHAPER_SCOPE_QUEUE. >> + * >> + * Returns 0 on group successfully created, otherwise an negative >> + * error value and set @extack to describe the failure's reason. >> + */ >> + int (*group)(const struct net_shaper_binding *binding, int leaves_count, >> + const struct net_shaper_handle *leaves_handles, >> + const struct net_shaper_info *leaves, >> + const struct net_shaper_handle *root_handle, >> + const struct net_shaper_info *root, >> + struct netlink_ext_ack *extack); >> + >> + /** >> + * @set: Updates the specified shaper >> + * >> + * Updates or creates the @shaper identified by the provided @handle >> + * on the given device @dev. >> + * >> + * Returns 0 on success, otherwise an negative >> + * error value and set @extack to describe the failure's reason. >> + */ >> + int (*set)(const struct net_shaper_binding *binding, >> + const struct net_shaper_handle *handle, >> + const struct net_shaper_info *shaper, >> + struct netlink_ext_ack *extack); >> + >> + /** >> + * @delete: Removes the specified shaper from the NIC >> + * >> + * Removes the shaper configuration as identified by the given @handle >> + * on the specified device @dev, restoring the default behavior. >> + * >> + * Returns 0 on success, otherwise an negative >> + * error value and set @extack to describe the failure's reason. >> + */ >> + int (*delete)(const struct net_shaper_binding *binding, >> + const struct net_shaper_handle *handle, >> + struct netlink_ext_ack *extack); >> +}; >> >> >> static inline struct net_device * >> net_shaper_binding_netdev(struct net_shaper_binding *binding) >> { >> WARN_ON(binding->type != NET_SHAPER_BINDING_TYPE_NETDEV) >> return binding->netdev; >> } >> >> static inline struct devlink_port * >> net_shaper_binding_devlink_port(struct net_shaper_binding *binding) >> { >> WARN_ON(binding->type != NET_SHAPER_BINDING_TYPE_DEVLINK_PORT) >> return binding->devlink_port; >> } >> >> Then whoever calls the op fills-up the binding structure accordingly. >> >> >> drivers can implement ops, for netdev-bound shaper like this: >> >> static int driverx_shaper_set(const struct net_shaper_binding *binding, >> const struct net_shaper_handle *handle, >> const struct net_shaper_info *shaper, >> struct netlink_ext_ack *extack); >> { >> struct net_device *netdev = net_shaper_binding_netdev(binding); >> ...... >> } >> >> struct net_shaper_ops driverx_shaper_ops { >> .set = driverx_shaper_set; >> ...... >> }; >> >> static const struct net_device_ops driverx_netdev_ops = { >> .net_shaper_ops = &driverx_shaper_ops, >> ...... >> }; > >If I read correctly, the net_shaper_ops caller will have to discriminate >between net_device and devlink, do the object-type-specific lookup to get the >relevant net_device (or devlink) object and then pass to such net_device (or >devlink) a "generic" binding. Did I misread something? If so I must admit I >really dislike such interface. You are right. For example devlink rate set command handler will call the op with devlink_port binding set. > >I personally think it would be much cleaner to have 2 separate set of >operations, with exactly the same semantic and argument list, except for the >first argument (struct net_device or struct devlink). I think it is totally subjective. You like something, I like something else. Both works. The amount of duplicity and need to change same things on multiple places in case of bugfixes and extensions is what I dislike on the 2 separate sets. Plus, there might be another binding in the future, will you copy the ops struct again then? > >The driver implementation could still de-duplicate a lot of code, as far as >the shaper-related arguments are the same. > >Side note, if the intention is to allow the user to touch/modify the >queue-level and queue-group-level shapers via the devlink object? if that is >the intention, we will need to drop the shaper cache and (re-)introduce a >get() callback, as the same shaper could be reached via multiple >binding/handle pairs and the core will not know all of such pairs for a given >shaper. That is a good question, I don't know. But gut feeling is "no". > >Thanks, > >Paolo >
On 8/23/24 15:36, Jiri Pirko wrote: > Fri, Aug 23, 2024 at 02:58:27PM CEST, pabeni@redhat.com wrote: >> I personally think it would be much cleaner to have 2 separate set of >> operations, with exactly the same semantic and argument list, except for the >> first argument (struct net_device or struct devlink). > > I think it is totally subjective. You like something, I like something > else. Both works. The amount of duplicity and need to change same > things on multiple places in case of bugfixes and extensions is what I > dislike on the 2 separate sets. My guestimate is that the amount of deltas caused by bugfixes and extensions will be much different in practice with the two approaches. I guess that even with the net_shaper_ops between devlink and net_device, there will be different callbacks implementation for devlink and net_device, right? If so, the differentiated operation list between devlink and net_device will trade a: { struct {net_device, netlink} = net_shaper_binding_{netdevice_netlink}(binding); preamble in every callback of every driver for a single additional operations set definition. It will at least scale better with the number of driver implementing the interface. > Plus, there might be another binding in > the future, will you copy the ops struct again then? Yes. Same reasons of the above. >> The driver implementation could still de-duplicate a lot of code, as far as >> the shaper-related arguments are the same. >> >> Side note, if the intention is to allow the user to touch/modify the >> queue-level and queue-group-level shapers via the devlink object? if that is >> the intention, we will need to drop the shaper cache and (re-)introduce a >> get() callback, as the same shaper could be reached via multiple >> binding/handle pairs and the core will not know all of such pairs for a given >> shaper. > > That is a good question, I don't know. But gut feeling is "no". Well, at least that is not in the direction of unlimited amount of additional time and pain ;) Thanks, Paolo
Fri, Aug 23, 2024 at 04:23:30PM CEST, pabeni@redhat.com wrote: >On 8/23/24 15:36, Jiri Pirko wrote: >> Fri, Aug 23, 2024 at 02:58:27PM CEST, pabeni@redhat.com wrote: >> > I personally think it would be much cleaner to have 2 separate set of >> > operations, with exactly the same semantic and argument list, except for the >> > first argument (struct net_device or struct devlink). >> >> I think it is totally subjective. You like something, I like something >> else. Both works. The amount of duplicity and need to change same >> things on multiple places in case of bugfixes and extensions is what I >> dislike on the 2 separate sets. > >My guestimate is that the amount of deltas caused by bugfixes and extensions >will be much different in practice with the two approaches. > >I guess that even with the net_shaper_ops between devlink and net_device, >there will be different callbacks implementation for devlink and net_device, >right? > >If so, the differentiated operation list between devlink and net_device will >trade a: > >{ > struct {net_device, netlink} = >net_shaper_binding_{netdevice_netlink}(binding); > >preamble in every callback of every driver for a single additional operations >set definition. So? > >It will at least scale better with the number of driver implementing the >interface. > >> Plus, there might be another binding in >> the future, will you copy the ops struct again then? > >Yes. Same reasons of the above. What's stopping anyone from diverging these 2-n sets? I mean, the whole purpose it unification and finding common ground. Once you have ops duplicated, sooner then later someone does change in A but ignore B. Having the "preamble" in every callback seems like very good tradeoff to prevent this scenario. > >> > The driver implementation could still de-duplicate a lot of code, as far as >> > the shaper-related arguments are the same. >> > >> > Side note, if the intention is to allow the user to touch/modify the >> > queue-level and queue-group-level shapers via the devlink object? if that is >> > the intention, we will need to drop the shaper cache and (re-)introduce a >> > get() callback, as the same shaper could be reached via multiple >> > binding/handle pairs and the core will not know all of such pairs for a given >> > shaper. >> >> That is a good question, I don't know. But gut feeling is "no". > >Well, at least that is not in the direction of unlimited amount of additional >time and pain ;) > >Thanks, > >Paolo >
On 8/26/24 11:31, Jiri Pirko wrote: > Fri, Aug 23, 2024 at 04:23:30PM CEST, pabeni@redhat.com wrote: >> On 8/23/24 15:36, Jiri Pirko wrote: >>> Fri, Aug 23, 2024 at 02:58:27PM CEST, pabeni@redhat.com wrote: >>>> I personally think it would be much cleaner to have 2 separate set of >>>> operations, with exactly the same semantic and argument list, except for the >>>> first argument (struct net_device or struct devlink). >>> >>> I think it is totally subjective. You like something, I like something >>> else. Both works. The amount of duplicity and need to change same >>> things on multiple places in case of bugfixes and extensions is what I >>> dislike on the 2 separate sets. >> >> My guestimate is that the amount of deltas caused by bugfixes and extensions >> will be much different in practice with the two approaches. >> >> I guess that even with the net_shaper_ops between devlink and net_device, >> there will be different callbacks implementation for devlink and net_device, >> right? >> >> If so, the differentiated operation list between devlink and net_device will >> trade a: >> >> { >> struct {net_device, netlink} = >> net_shaper_binding_{netdevice_netlink}(binding); >> >> preamble in every callback of every driver for a single additional operations >> set definition. > > So? The amount of code we would need to change in case of core changes would probably be similar with either the differentiated operations list or not. >> It will at least scale better with the number of driver implementing the >> interface. >> >>> Plus, there might be another binding in >>> the future, will you copy the ops struct again then? >> >> Yes. Same reasons of the above. > > What's stopping anyone from diverging these 2-n sets? I mean, the whole > purpose it unification and finding common ground. Once you have ops > duplicated, sooner then later someone does change in A but ignore B. > Having the "preamble" in every callback seems like very good tradeoff > to prevent this scenario. The main fact is that we do not agree on the above point - unify the shaper_ops between struct net_device and struct devlink. I think a 3rd party opinion could help moving forward. @Jakub could you please share your view here? Thanks, Paolo
On Tue, 27 Aug 2024 16:37:38 +0200 Paolo Abeni wrote: > > What's stopping anyone from diverging these 2-n sets? I mean, the whole > > purpose it unification and finding common ground. Once you have ops > > duplicated, sooner then later someone does change in A but ignore B. > > Having the "preamble" in every callback seems like very good tradeoff > > to prevent this scenario. > > The main fact is that we do not agree on the above point - unify the > shaper_ops between struct net_device and struct devlink. > > I think a 3rd party opinion could help moving forward. > @Jakub could you please share your view here? I don't mind Jiri's suggestion. Driver can declare its own helper: static struct drv_port * drv_shaper_binding_to_prot(const struct net_shaper_binding *binding) { if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV) return /* netdev_priv() ? */; if (binding->type == NET_SHAPER_BINDING_TYPE_DEVLINK_PORT) return /* container_of() ? */; WARN_ONCE(); return NULL; } And call that instead of netdev_priv()?
On 8/27/24 16:54, Jakub Kicinski wrote: > On Tue, 27 Aug 2024 16:37:38 +0200 Paolo Abeni wrote: >>> What's stopping anyone from diverging these 2-n sets? I mean, the whole >>> purpose it unification and finding common ground. Once you have ops >>> duplicated, sooner then later someone does change in A but ignore B. >>> Having the "preamble" in every callback seems like very good tradeoff >>> to prevent this scenario. >> >> The main fact is that we do not agree on the above point - unify the >> shaper_ops between struct net_device and struct devlink. >> >> I think a 3rd party opinion could help moving forward. >> @Jakub could you please share your view here? > > I don't mind Jiri's suggestion. Driver can declare its own helper: > > static struct drv_port * > drv_shaper_binding_to_prot(const struct net_shaper_binding *binding) > { > if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV) > return /* netdev_priv() ? */; > if (binding->type == NET_SHAPER_BINDING_TYPE_DEVLINK_PORT) > return /* container_of() ? */; > WARN_ONCE(); > return NULL; > } > > And call that instead of netdev_priv()? As I wrote this does not look like something that would help de-deuplicate any code, but since you both seem to agree... Double checking before I rewrote a significant amount of the core code: In the NL API, I will replace ifindex with binding, the latter will include nested attributes ifindex, bus_name and dev_name. Note that 'struct net_shaper_binding' must include a ‘struct devlink *’ as opposed to a ‘struct devlink_port *’, as mentioned in the code snippet so far, or we will have to drop the shaper cache and re-introduce a get() operation. The ops will try to fetch the net_device or devlink according to the provided attributes. At the moment the ops will error out with ENOTSUP for netlink object. Most core helpers will be re-factored to accept a 'binding *' argument in place of a 'struct net_device *'. Full netlink support (for the brave that will implement it later) will likely require at least an additional scope (devlink_port), include the net_shaper_ops and net_shaper_data inside struct devlink, and likely code adaptation inside the core. /P
On Tue, 27 Aug 2024 22:43:09 +0200 Paolo Abeni wrote: > >> The main fact is that we do not agree on the above point - unify the > >> shaper_ops between struct net_device and struct devlink. > >> > >> I think a 3rd party opinion could help moving forward. > >> @Jakub could you please share your view here? > > > > I don't mind Jiri's suggestion. Driver can declare its own helper: > > > > static struct drv_port * > > drv_shaper_binding_to_port(const struct net_shaper_binding *binding) > > { > > if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV) > > return /* netdev_priv() ? */; > > if (binding->type == NET_SHAPER_BINDING_TYPE_DEVLINK_PORT) > > return /* container_of() ? */; > > WARN_ONCE(); > > return NULL; > > } > > > > And call that instead of netdev_priv()? > > As I wrote this does not look like something that would help > de-deuplicate any code, but since you both seem to agree... To be clear. Given the helper above you can replace: struct iavf_adapter *adapter = netdev_priv(dev); with: struct iavf_adapter *adapter = iavf_from_shaper_handle(handle); In all the callbacks, and callbacks can now take devlink or other "handles". > Double checking before I rewrote a significant amount of the core code: > > In the NL API, I will replace ifindex with binding, the latter will > include nested attributes ifindex, bus_name > and dev_name. Any mention of the netlink API makes me worried that the "internal representation should be separate from the uAPI" point is not agreed on or at least not understood :( The NL API does not need other object types. And there's currently no uAPI gap for devlink, AFAIK. So the conversation about "handles" if quite forward-looking. > Note that 'struct net_shaper_binding' must include a ‘struct devlink > *’ as opposed to a > ‘struct devlink_port *’, as mentioned in the code snippet so far, or > we will have to drop the > shaper cache and re-introduce a get() operation. > > The ops will try to fetch the net_device or devlink according to the > provided attributes. > At the moment the ops will error out with ENOTSUP for netlink object. No need to error out. Driver must not receive calls with object types they don't support (we can add capabilities for this, later on, and core should check, or do what Jiri suggests and just hook the ops into different structs). > Most core helpers > will be re-factored to accept a 'binding *' argument in place of a > 'struct net_device *'. > > Full netlink support (for the brave that will implement it later) will > likely require at least an > additional scope (devlink_port), include the net_shaper_ops and > net_shaper_data inside > struct devlink, and likely code adaptation inside the core. Back to uAPI worries. FWIW I think that how we pass the handle to drivers is of relatively little importance. It's small amount of mechanical work to change it later. But you asked me about the proposal so I answered. To me representing the other shaper APIs in terms of the new driver facing API *within netdev* is more important. Literally the only change I would have expected based on this branch of the conversation is slight adjustment to the parameters to ops. Nothing else. No netlink changes. Only core change would be to wrap the ops.
On Tue, Aug 27, 2024 at 11:10 PM Jakub Kicinski <kuba@kernel.org> wrote: > Literally the only change I would have expected based on this branch of > the conversation is slight adjustment to the parameters to ops. Nothing > else. No netlink changes. Only core change would be to wrap the ops. FTR, the above is quite the opposite of my understanding of the whole conversation so far. If the whole delta under discussion is just that, we could make such a change at any given time (and I would strongly support the idea to make it only when the devlink bits will be ready) and its presence (or absence) will not block the net_device/devlink shaper callback consolidation in any way. I thought Jiri intended the whole core bits to be adapted to handle 'binding' instead of net_device, to allow a more seamless plug of devlink. @Jiri: did I misread your words? Thanks, Paolo
Tue, Aug 27, 2024 at 11:54:48PM CEST, pabeni@redhat.com wrote: >On Tue, Aug 27, 2024 at 11:10 PM Jakub Kicinski <kuba@kernel.org> wrote: >> Literally the only change I would have expected based on this branch of >> the conversation is slight adjustment to the parameters to ops. Nothing >> else. No netlink changes. Only core change would be to wrap the ops. > >FTR, the above is quite the opposite of my understanding of the whole >conversation so far. If the whole delta under discussion is just that, >we could make such a change at any given time (and I would strongly >support the idea to make it only when the devlink bits will be ready) >and its presence (or absence) will not block the net_device/devlink >shaper callback consolidation in any way. > >I thought Jiri intended the whole core bits to be adapted to handle >'binding' instead of net_device, to allow a more seamless plug of >devlink. >@Jiri: did I misread your words? Yep that is correct. But you don't need the UAPI extension for that, as UAPI for devlink rate already exists. Regarding your shaper UAPI. I think that you just need to make sure it is easily extendable by another binding in the future. The current ifindex binding is fine for your shaper UAPI. Just have it in nest and allow to extend by another attr later on. Code speaks: enum net_shaper_a_binding { NET_SHAPER_A_BINDING_IFINDEX = 1; }; enum { NET_SHAPER_A_BINDING = 1, /*nested enum net_shaper_a_binding*/ NET_SHAPER_A_..... .................. .................. }; Later on we can easily extend the UAPI by: enum net_shaper_a_binding { NET_SHAPER_A_BINDING_IFINDEX = 1; NET_SHAPER_A_BINDING_DEVLINK_PORT; } Makes sense? > >Thanks, > >Paolo >
On 8/28/24 08:40, Jiri Pirko wrote:
> Makes sense?
Almost! Tacking aside the (very significant) differences between your
proposition and Jakub’s, we can't use devlink port here, just devlink,
or we will have to drop the cache too[1]. Specific devlink port shapers
will be reached via different handles (scope/id).
Additionally, I think we don't need strictly the ‘binding’ nested
attribute to extend the NL API with different binding objects (devlink),
we could append the new attributes needed to support (identify) devlink
at the end of the net shaper attributes list. I agree that would be
likely less ‘nice’.
What about:
- Refactor the core and the driver api to support the ‘binding’ thing
- Update the NL definition to nest the ‘ifindex’ attribute under the
‘binding’ one. No mention/reference to devlink yet, so most of the
documentation will be unchanged.
- devlink support will not be included, but there should be enough
ground paved for it.
?
Thanks,
Paolo
[1] the cache container belongs to the ‘entry point’ inside the shaper
hierarchy - i.e. currently, the struct net_device. If we add a
devlink_port ‘entry point’, the cache there will have to manage even the
shaper for devlink ports group. When accessing a group containing
multiple ports, we will get multiple inconsistent cache values.
Wed, Aug 28, 2024 at 12:55:31PM CEST, pabeni@redhat.com wrote: >On 8/28/24 08:40, Jiri Pirko wrote: >> Makes sense? > >Almost! Tacking aside the (very significant) differences between your >proposition and Jakub’s, we can't use devlink port here, just devlink, or we >will have to drop the cache too[1]. Specific devlink port shapers will be >reached via different handles (scope/id). Ok, I guess. Need to see the code. > >Additionally, I think we don't need strictly the ‘binding’ nested attribute >to extend the NL API with different binding objects (devlink), we could >append the new attributes needed to support (identify) devlink at the end of >the net shaper attributes list. I agree that would be likely less ‘nice’. True and true. > >What about: >- Refactor the core and the driver api to support the ‘binding’ thing Ack. >- Update the NL definition to nest the ‘ifindex’ attribute under the >‘binding’ one. No mention/reference to devlink yet, so most of the >documentation will be unchanged. Ack. >- devlink support will not be included, but there should be enough ground >paved for it. Ack. > >? Thanks! > >Thanks, > >Paolo > >[1] the cache container belongs to the ‘entry point’ inside the shaper >hierarchy - i.e. currently, the struct net_device. If we add a devlink_port >‘entry point’, the cache there will have to manage even the shaper for >devlink ports group. When accessing a group containing multiple ports, we >will get multiple inconsistent cache values. >
On Wed, 28 Aug 2024 12:55:31 +0200 Paolo Abeni wrote: > - Update the NL definition to nest the ‘ifindex’ attribute under the > ‘binding’ one. No mention/reference to devlink yet, so most of the > documentation will be unchanged. Sorry but I think that's a bad idea. Nesting attributes in netlink with no semantic implications, just to "organize" attributes which are somehow related just complicates the code and wastes space. Netlink is not JSON. Especially in this case, where we would do it for future uAPI extension which I really hope we won't need, since (1) devlink already has one, (2) the point of this API is to reduce the uAPI surface, not extend it, (3) user requirements for devlink and netdev config are different.
On 8/28/24 22:30, Jakub Kicinski wrote: > On Wed, 28 Aug 2024 12:55:31 +0200 Paolo Abeni wrote: >> - Update the NL definition to nest the ‘ifindex’ attribute under the >> ‘binding’ one. No mention/reference to devlink yet, so most of the >> documentation will be unchanged. > > Sorry but I think that's a bad idea. Nesting attributes in netlink > with no semantic implications, just to "organize" attributes which > are somehow related just complicates the code and wastes space. > Netlink is not JSON. > > Especially in this case, where we would do it for future uAPI extension > which I really hope we won't need, since (1) devlink already has one, > (2) the point of this API is to reduce the uAPI surface, not extend it, > (3) user requirements for devlink and netdev config are different. FTR I was no more than 60" from posting the new revision including that when I read that. I hope Jiri would agree... Well, I guess I have to roll-back a lot of changes... Not sure if I'll be able to post tomorrow evening my time... /P
Wed, Aug 28, 2024 at 11:13:04PM CEST, pabeni@redhat.com wrote: > > >On 8/28/24 22:30, Jakub Kicinski wrote: >> On Wed, 28 Aug 2024 12:55:31 +0200 Paolo Abeni wrote: >> > - Update the NL definition to nest the ‘ifindex’ attribute under the >> > ‘binding’ one. No mention/reference to devlink yet, so most of the >> > documentation will be unchanged. >> >> Sorry but I think that's a bad idea. Nesting attributes in netlink >> with no semantic implications, just to "organize" attributes which >> are somehow related just complicates the code and wastes space. >> Netlink is not JSON. >> >> Especially in this case, where we would do it for future uAPI extension >> which I really hope we won't need, since (1) devlink already has one, >> (2) the point of this API is to reduce the uAPI surface, not extend it, >> (3) user requirements for devlink and netdev config are different. > >FTR I was no more than 60" from posting the new revision including that when >I read that. I hope Jiri would agree... Sure. > >Well, I guess I have to roll-back a lot of changes... Not sure if I'll be >able to post tomorrow evening my time... > >/P > >
diff --git a/Documentation/networking/kapi.rst b/Documentation/networking/kapi.rst index ea55f462cefa..98682b9a13ee 100644 --- a/Documentation/networking/kapi.rst +++ b/Documentation/networking/kapi.rst @@ -104,6 +104,9 @@ Driver Support .. kernel-doc:: include/linux/netdevice.h :internal: +.. kernel-doc:: include/net/net_shaper.h + :internal: + PHY Support ----------- diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 607009150b5f..d3d952be711c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -81,6 +81,8 @@ struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; struct ethtool_netdev_state; +struct net_shaper_ops; +struct net_shaper_data; typedef u32 xdp_features_t; @@ -1598,6 +1600,14 @@ struct net_device_ops { int (*ndo_hwtstamp_set)(struct net_device *dev, struct kernel_hwtstamp_config *kernel_config, struct netlink_ext_ack *extack); + +#if IS_ENABLED(CONFIG_NET_SHAPER) + /** + * @net_shaper_ops: Device shaping offload operations + * see include/net/net_shapers.h + */ + const struct net_shaper_ops *net_shaper_ops; +#endif }; /** @@ -2408,6 +2418,13 @@ struct net_device { /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */ struct dim_irq_moder *irq_moder; +#if IS_ENABLED(CONFIG_NET_SHAPER) + /** + * @net_shaper_data: data tracking the current shaper status + * see include/net/net_shapers.h + */ + struct net_shaper_data *net_shaper_data; +#endif u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; diff --git a/include/net/net_shaper.h b/include/net/net_shaper.h new file mode 100644 index 000000000000..8cd65d727e52 --- /dev/null +++ b/include/net/net_shaper.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _NET_SHAPER_H_ +#define _NET_SHAPER_H_ + +#include <linux/types.h> +#include <linux/bits.h> +#include <linux/bitfield.h> +#include <linux/netdevice.h> +#include <linux/netlink.h> + +#include <uapi/linux/net_shaper.h> + +/** + * struct net_shaper_info - represents a shaping node on the NIC H/W + * zeroed field are considered not set. + * @handle: Unique identifier for the shaper, see @net_shaper_make_handle + * @parent: Unique identifier for the shaper parent, usually implied. Only + * NET_SHAPER_SCOPE_QUEUE, NET_SHAPER_SCOPE_NETDEV and NET_SHAPER_SCOPE_DETACHED + * can have the parent handle explicitly set, placing such shaper under + * the specified parent. + * @metric: Specify if the bw limits refers to PPS or BPS + * @bw_min: Minimum guaranteed rate for this shaper + * @bw_max: Maximum peak bw allowed for this shaper + * @burst: Maximum burst for the peek rate of this shaper + * @priority: Scheduling priority for this shaper + * @weight: Scheduling weight for this shaper + * @children: Number of nested shapers, accounted only for DETACHED scope + */ +struct net_shaper_info { + u32 handle; + u32 parent; + enum net_shaper_metric metric; + u64 bw_min; + u64 bw_max; + u64 burst; + u32 priority; + u32 weight; + u32 children; +}; + +/** + * define NET_SHAPER_SCOPE_VF - Shaper scope + * + * This shaper scope is not exposed to user-space; the shaper is attached to + * the given virtual function. + */ +#define NET_SHAPER_SCOPE_VF __NET_SHAPER_SCOPE_MAX + +/** + * struct net_shaper_ops - Operations on device H/W shapers + * + * The initial shaping configuration ad device initialization is empty/ + * a no-op/does not constraint the b/w in any way. + * The network core keeps track of the applied user-configuration in + * per device storage. + * + * Each shaper is uniquely identified within the device with an 'handle', + * dependent on the shaper scope and other data, see @shaper_make_handle() + */ +struct net_shaper_ops { + /** + * @group: create the specified shapers group + * + * Nest the specified @inputs shapers under the given @output shaper + * on the network device @dev. The @input shaper array size is specified + * by @nr_input. + * Create either the @inputs and the @output shaper as needed, + * otherwise move them as needed. Can't create @inputs shapers with + * NET_SHAPER_SCOPE_DETACHED scope, a separate @group call with such + * shaper as @output is needed. + * + * Returns 0 on group successfully created, otherwise an negative + * error value and set @extack to describe the failure's reason. + */ + int (*group)(struct net_device *dev, int nr_input, + const struct net_shaper_info *inputs, + const struct net_shaper_info *output, + struct netlink_ext_ack *extack); + + /** + * @set: Updates the specified shaper + * + * Updates or creates the specified @shaper on the given device + * @dev. Can't create NET_SHAPER_SCOPE_DETACHED shapers, use @group + * instead. + * + * Returns 0 on group successfully created, otherwise an negative + * error value and set @extack to describe the failure's reason. + */ + int (*set)(struct net_device *dev, + const struct net_shaper_info *shaper, + struct netlink_ext_ack *extack); + + /** + * @delete: Removes the specified shaper from the NIC + * + * Removes the shaper configuration as identified by the given @handle + * on the specified device @dev, restoring the default behavior. + * + * Returns 0 on group successfully created, otherwise an negative + * error value and set @extack to describe the failure's reason. + */ + int (*delete)(struct net_device *dev, u32 handle, + struct netlink_ext_ack *extack); +}; + +#define NET_SHAPER_SCOPE_SHIFT 26 +#define NET_SHAPER_ID_MASK GENMASK(NET_SHAPER_SCOPE_SHIFT - 1, 0) +#define NET_SHAPER_SCOPE_MASK GENMASK(31, NET_SHAPER_SCOPE_SHIFT) + +#define NET_SHAPER_ID_UNSPEC NET_SHAPER_ID_MASK + +/** + * net_shaper_make_handle - creates an unique shaper identifier + * @scope: the shaper scope + * @id: the shaper id number + * + * Return: an unique identifier for the shaper + * + * Combines the specified arguments to create an unique identifier for + * the shaper. The @id argument semantic depends on the + * specified scope. + * For @NET_SHAPER_SCOPE_QUEUE_GROUP, @id is the queue group id + * For @NET_SHAPER_SCOPE_QUEUE, @id is the queue number. + * For @NET_SHAPER_SCOPE_VF, @id is virtual function number. + */ +static inline u32 net_shaper_make_handle(enum net_shaper_scope scope, + int id) +{ + return FIELD_PREP(NET_SHAPER_SCOPE_MASK, scope) | + FIELD_PREP(NET_SHAPER_ID_MASK, id); +} + +/** + * net_shaper_handle_scope - extract the scope from the given handle + * @handle: the shaper handle + * + * Return: the corresponding scope + */ +static inline enum net_shaper_scope net_shaper_handle_scope(u32 handle) +{ + return FIELD_GET(NET_SHAPER_SCOPE_MASK, handle); +} + +/** + * net_shaper_handle_id - extract the id number from the given handle + * @handle: the shaper handle + * + * Return: the corresponding id number + */ +static inline int net_shaper_handle_id(u32 handle) +{ + return FIELD_GET(NET_SHAPER_ID_MASK, handle); +} + +#endif + diff --git a/net/core/dev.c b/net/core/dev.c index 6ea1d20676fb..3dc1dd428eda 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11169,6 +11169,8 @@ void free_netdev(struct net_device *dev) /* Flush device addresses */ dev_addr_flush(dev); + dev_shaper_flush(dev); + list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p); diff --git a/net/core/dev.h b/net/core/dev.h index 5654325c5b71..e376fc1c867b 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -35,6 +35,12 @@ void dev_addr_flush(struct net_device *dev); int dev_addr_init(struct net_device *dev); void dev_addr_check(struct net_device *dev); +#if IS_ENABLED(CONFIG_NET_SHAPER) +void dev_shaper_flush(struct net_device *dev); +#else +static inline void dev_shaper_flush(struct net_device *dev) {} +#endif + /* sysctls not referred to from outside net/core/ */ extern int netdev_unregister_timeout_secs; extern int weight_p; diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index 49de88c68e2f..5d1d6e600a6a 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -1,19 +1,242 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/kernel.h> +#include <linux/idr.h> #include <linux/skbuff.h> +#include <linux/xarray.h> +#include <net/net_shaper.h> #include "shaper_nl_gen.h" +#include "../core/dev.h" + +struct net_shaper_data { + struct xarray shapers; + struct idr detached_ids; +}; + +struct net_shaper_nl_ctx { + u32 start_handle; +}; + +static int fill_handle(struct sk_buff *msg, u32 handle, u32 type, + const struct genl_info *info) +{ + struct nlattr *handle_attr; + + if (!handle) + return 0; + + handle_attr = nla_nest_start_noflag(msg, type); + if (!handle_attr) + return -EMSGSIZE; + + if (nla_put_u32(msg, NET_SHAPER_A_SCOPE, + net_shaper_handle_scope(handle)) || + nla_put_u32(msg, NET_SHAPER_A_ID, + net_shaper_handle_id(handle))) + goto handle_nest_cancel; + + nla_nest_end(msg, handle_attr); + return 0; + +handle_nest_cancel: + nla_nest_cancel(msg, handle_attr); + return -EMSGSIZE; +} + +static int +net_shaper_fill_one(struct sk_buff *msg, struct net_shaper_info *shaper, + const struct genl_info *info) +{ + void *hdr; + + hdr = genlmsg_iput(msg, info); + if (!hdr) + return -EMSGSIZE; + + if (fill_handle(msg, shaper->parent, NET_SHAPER_A_PARENT, info) || + fill_handle(msg, shaper->handle, NET_SHAPER_A_HANDLE, info) || + nla_put_u32(msg, NET_SHAPER_A_METRIC, shaper->metric) || + nla_put_uint(msg, NET_SHAPER_A_BW_MIN, shaper->bw_min) || + nla_put_uint(msg, NET_SHAPER_A_BW_MAX, shaper->bw_max) || + nla_put_uint(msg, NET_SHAPER_A_BURST, shaper->burst) || + nla_put_u32(msg, NET_SHAPER_A_PRIORITY, shaper->priority) || + nla_put_u32(msg, NET_SHAPER_A_WEIGHT, shaper->weight)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/* On success sets pdev to the relevant device and acquires a reference + * to it + */ +static int fetch_dev(const struct genl_info *info, struct net_device **pdev) +{ + struct net *ns = genl_info_net(info); + struct net_device *dev; + int ifindex; + + if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_IFINDEX)) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[NET_SHAPER_A_IFINDEX]); + dev = dev_get_by_index(ns, ifindex); + if (!dev) { + GENL_SET_ERR_MSG_FMT(info, "device %d not found", ifindex); + return -EINVAL; + } + + if (!dev->netdev_ops->net_shaper_ops) { + GENL_SET_ERR_MSG_FMT(info, "device %s does not support H/W shaper", + dev->name); + netdev_put(dev, NULL); + return -EOPNOTSUPP; + } + + *pdev = dev; + return 0; +} + +static struct xarray *__sc_container(struct net_device *dev) +{ + return dev->net_shaper_data ? &dev->net_shaper_data->shapers : NULL; +} + +/* lookup the given shaper inside the cache */ +static struct net_shaper_info *sc_lookup(struct net_device *dev, u32 handle) +{ + struct xarray *xa = __sc_container(dev); + + return xa ? xa_load(xa, handle) : NULL; +} + +static int parse_handle(const struct nlattr *attr, const struct genl_info *info, + u32 *handle) +{ + struct nlattr *tb[NET_SHAPER_A_ID + 1]; + struct nlattr *scope_attr, *id_attr; + enum net_shaper_scope scope; + u32 id = 0; + int ret; + + ret = nla_parse_nested(tb, NET_SHAPER_A_ID, attr, + net_shaper_handle_nl_policy, info->extack); + if (ret < 0) + return ret; + + scope_attr = tb[NET_SHAPER_A_SCOPE]; + if (!scope_attr) { + GENL_SET_ERR_MSG(info, "Missing 'scope' attribute for handle"); + return -EINVAL; + } + + scope = nla_get_u32(scope_attr); + + /* the default id for detached scope shapers is an invalid one + * to help the 'group' operation discriminate between new + * detached shaper creation (ID_UNSPEC) and reuse of existing + * shaper (any other value) + */ + id_attr = tb[NET_SHAPER_A_ID]; + if (id_attr) + id = nla_get_u32(id_attr); + else if (scope == NET_SHAPER_SCOPE_DETACHED) + id = NET_SHAPER_ID_UNSPEC; + + *handle = net_shaper_make_handle(scope, id); + return 0; +} + int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) { - return -EOPNOTSUPP; + struct net_shaper_info *shaper; + struct net_device *dev; + struct sk_buff *msg; + u32 handle; + int ret; + + ret = fetch_dev(info, &dev); + if (ret) + return ret; + + if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_HANDLE)) + goto put; + + ret = parse_handle(info->attrs[NET_SHAPER_A_HANDLE], info, &handle); + if (ret < 0) + goto put; + + shaper = sc_lookup(dev, handle); + if (!shaper) { + GENL_SET_ERR_MSG_FMT(info, "Can't find shaper for handle %x", handle); + ret = -EINVAL; + goto put; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto put; + } + + ret = net_shaper_fill_one(msg, shaper, info); + if (ret) + goto free_msg; + + ret = genlmsg_reply(msg, info); + if (ret) + goto free_msg; + +put: + netdev_put(dev, NULL); + return ret; + +free_msg: + nlmsg_free(msg); + goto put; } int net_shaper_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { - return -EOPNOTSUPP; + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; + const struct genl_info *info = genl_info_dump(cb); + struct net_shaper_info *shaper; + struct net_device *dev; + unsigned long handle; + int ret; + + ret = fetch_dev(info, &dev); + if (ret) + return ret; + + BUILD_BUG_ON(sizeof(struct net_shaper_nl_ctx) > sizeof(cb->ctx)); + + /* don't error out dumps performed before any set operation */ + if (!dev->net_shaper_data) { + ret = 0; + goto put; + } + + xa_for_each_range(&dev->net_shaper_data->shapers, handle, shaper, + ctx->start_handle, U32_MAX) { + ret = net_shaper_fill_one(skb, shaper, info); + if (ret) + goto put; + + ctx->start_handle = handle; + } + +put: + netdev_put(dev, NULL); + return ret; } int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info) @@ -26,6 +249,30 @@ int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; } +int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) +{ + return -EOPNOTSUPP; +} + +void dev_shaper_flush(struct net_device *dev) +{ + struct xarray *xa = __sc_container(dev); + struct net_shaper_info *cur; + unsigned long index; + + if (!xa) + return; + + xa_lock(xa); + xa_for_each(xa, index, cur) { + __xa_erase(xa, index); + kfree(cur); + } + idr_destroy(&dev->net_shaper_data->detached_ids); + xa_unlock(xa); + kfree(xa); +} + static int __init shaper_init(void) { return genl_register_family(&net_shaper_nl_family);
Introduce the basic infrastructure to implement the net-shaper core functionality. Each network devices carries a net-shaper cache, the NL get() operation fetches the data from such cache. The cache is initially empty, will be fill by the set()/group() operation implemented later and is destroyed at device cleanup time. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- RFC v2 -> RFC v3: - dev_put() -> netdev_put() --- Documentation/networking/kapi.rst | 3 + include/linux/netdevice.h | 17 ++ include/net/net_shaper.h | 158 +++++++++++++++++++ net/core/dev.c | 2 + net/core/dev.h | 6 + net/shaper/shaper.c | 251 +++++++++++++++++++++++++++++- 6 files changed, 435 insertions(+), 2 deletions(-) create mode 100644 include/net/net_shaper.h