diff mbox series

[rdma-next,v1,07/11] RDMA/nldev: Allow optional-counter status configuration through RDMA netlink

Message ID ed88592c676c5926195a6f89926146acaa466641.1631660727.git.leonro@nvidia.com (mailing list archive)
State Not Applicable
Delegated to: Netdev Maintainers
Headers show
Series Optional counter statistics support | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Guessed tree name to be net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 2 maintainers not CCed: jgg@ziepe.ca laniel_francis@privacyrequired.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 2 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 152 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success Link

Commit Message

Leon Romanovsky Sept. 14, 2021, 11:07 p.m. UTC
From: Aharon Landau <aharonl@nvidia.com>

Provide an option to allow users to enable/disable optional counters
through RDMA netlink. Limiting it to users with ADMIN capability only.

Examples:
1. Enable optional counters cc_rx_ce_pkts and cc_rx_cnp_pkts (and
   disable all others):
$ sudo rdma statistic set link rocep8s0f0/1 optional-counters \
    cc_rx_ce_pkts,cc_rx_cnp_pkts

2. Remove all optional counters:
$ sudo rdma statistic unset link rocep8s0f0/1 optional-counters

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/nldev.c | 118 ++++++++++++++++++++++++--------
 1 file changed, 88 insertions(+), 30 deletions(-)

Comments

Jason Gunthorpe Sept. 27, 2021, 5:20 p.m. UTC | #1
On Wed, Sep 15, 2021 at 02:07:26AM +0300, Leon Romanovsky wrote:
> -		return -EINVAL;
> +		need_enable = false;
> +		disabled = test_bit(i, stats->is_disabled);
> +		nla_for_each_nested(entry_attr,
> +				    tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS], rem) {
> +			index = nla_get_u32(entry_attr);
> +			if (index >= stats->num_counters)
> +				return -EINVAL;
> +			if (i == index) {
> +				need_enable = true;
> +				break;
> +			}
> +		}
>  
> -	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
> -	if (!rdma_is_port_valid(device, port)) {
> -		ret = -EINVAL;
> -		goto err;
> +		if (disabled && need_enable)
> +			ret = rdma_counter_modify(device, port, i, true);
> +		else if (!disabled && !need_enable)
> +			ret = rdma_counter_modify(device, port, i, false);

This disabled check looks racy, I would do the no-change optimization inside
rdma_counter_modify()

Also, this is a O(N^2) algorithm, why not do it in one pass with a
small memory allocation for the target state bitmap?

Jason
Leon Romanovsky Sept. 29, 2021, 12:27 p.m. UTC | #2
On Mon, Sep 27, 2021 at 02:20:06PM -0300, Jason Gunthorpe wrote:
> On Wed, Sep 15, 2021 at 02:07:26AM +0300, Leon Romanovsky wrote:
> > -		return -EINVAL;
> > +		need_enable = false;
> > +		disabled = test_bit(i, stats->is_disabled);
> > +		nla_for_each_nested(entry_attr,
> > +				    tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS], rem) {
> > +			index = nla_get_u32(entry_attr);
> > +			if (index >= stats->num_counters)
> > +				return -EINVAL;
> > +			if (i == index) {
> > +				need_enable = true;
> > +				break;
> > +			}
> > +		}
> >  
> > -	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
> > -	if (!rdma_is_port_valid(device, port)) {
> > -		ret = -EINVAL;
> > -		goto err;
> > +		if (disabled && need_enable)
> > +			ret = rdma_counter_modify(device, port, i, true);
> > +		else if (!disabled && !need_enable)
> > +			ret = rdma_counter_modify(device, port, i, false);
> 
> This disabled check looks racy, I would do the no-change optimization inside
> rdma_counter_modify()
> 
> Also, this is a O(N^2) algorithm, why not do it in one pass with a
> small memory allocation for the target state bitmap?

We don't have many counters. Is this optimization really worth it?

Thanks

> 
> Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index d9443983efdc..b00e4257823d 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -1897,42 +1897,65 @@  static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	return err;
 }
 
-static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
-			       struct netlink_ext_ack *extack)
+static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
+					       struct ib_device *device,
+					       u32 port)
 {
-	u32 index, port, mode, mask = 0, qpn, cntn = 0;
-	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
-	struct ib_device *device;
-	struct sk_buff *msg;
-	int ret;
+	struct rdma_hw_stats *stats;
+	int rem, i, index, ret = 0;
+	bool need_enable, disabled;
+	struct nlattr *entry_attr;
 
-	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
-			  nldev_policy, extack);
-	/* Currently only counter for QP is supported */
-	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
-	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
-	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE])
+	stats = ib_get_hw_stats_port(device, port);
+	if (!stats)
 		return -EINVAL;
 
-	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
-		return -EINVAL;
+	for (i = 0; i < stats->num_counters; i++) {
+		if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
+			continue;
 
-	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
-	device = ib_device_get_by_index(sock_net(skb->sk), index);
-	if (!device)
-		return -EINVAL;
+		need_enable = false;
+		disabled = test_bit(i, stats->is_disabled);
+		nla_for_each_nested(entry_attr,
+				    tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS], rem) {
+			index = nla_get_u32(entry_attr);
+			if (index >= stats->num_counters)
+				return -EINVAL;
+			if (i == index) {
+				need_enable = true;
+				break;
+			}
+		}
 
-	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
-	if (!rdma_is_port_valid(device, port)) {
-		ret = -EINVAL;
-		goto err;
+		if (disabled && need_enable)
+			ret = rdma_counter_modify(device, port, i, true);
+		else if (!disabled && !need_enable)
+			ret = rdma_counter_modify(device, port, i, false);
+
+		if (ret)
+			break;
 	}
 
+	return ret;
+}
+
+static int nldev_stat_set_mode_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+				    struct netlink_ext_ack *extack,
+				    struct nlattr *tb[],
+				    struct ib_device *device, u32 port)
+{
+	u32 mode, mask = 0, qpn, cntn = 0;
+	struct sk_buff *msg;
+	int ret;
+
+	/* Currently only counter for QP is supported */
+	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
+		return -EINVAL;
+
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg) {
-		ret = -ENOMEM;
-		goto err;
-	}
+	if (!msg)
+		return -ENOMEM;
+
 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
 					 RDMA_NLDEV_CMD_STAT_SET),
@@ -1947,8 +1970,10 @@  static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 		if (ret)
 			goto err_msg;
 	} else {
-		if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
+		if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) {
+			ret = -EINVAL;
 			goto err_msg;
+		}
 		qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
 		if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
 			cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
@@ -1970,14 +1995,47 @@  static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 
 	nlmsg_end(msg, nlh);
-	ib_device_put(device);
 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_fill:
 	rdma_counter_unbind_qpn(device, port, qpn, cntn);
 err_msg:
 	nlmsg_free(msg);
-err:
+	return ret;
+}
+
+static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+			       struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+	struct ib_device *device;
+	u32 index, port;
+	int ret;
+
+	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
+			  extack);
+	if (ret)
+		return -EINVAL;
+
+	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+	device = ib_device_get_by_index(sock_net(skb->sk), index);
+	if (!device)
+		return -EINVAL;
+
+	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+	if (!rdma_is_port_valid(device, port)) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	if (tb[RDMA_NLDEV_ATTR_STAT_MODE])
+		ret = nldev_stat_set_mode_doit(skb, nlh, extack, tb, device,
+					       port);
+	else if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC])
+		ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
+	else
+		ret = -EINVAL;
+end:
 	ib_device_put(device);
 	return ret;
 }