Message ID | 04bd7354c6a375e684712d79915f7eb816efee92.1631660727.git.leonro@nvidia.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Jason Gunthorpe |
Headers | show |
Series | Optional counter statistics support | expand |
On Wed, Sep 15, 2021 at 02:07:24AM +0300, Leon Romanovsky wrote: > > +int rdma_counter_modify(struct ib_device *dev, u32 port, int index, bool enable) > +{ > + struct rdma_hw_stats *stats; > + int ret; > + > + if (!dev->ops.modify_hw_stat) > + return -EOPNOTSUPP; > + > + stats = ib_get_hw_stats_port(dev, port); > + if (!stats) > + return -EINVAL; > + > + mutex_lock(&stats->lock); > + ret = dev->ops.modify_hw_stat(dev, port, index, enable); > + if (!ret) > + enable ? clear_bit(index, stats->is_disabled) : > + set_bit(index, stats->is_disabled); This is not a kernel coding style write out the if, use success oriented flow Also, shouldn't this logic protect the driver from being called on non-optional counters? > for (i = 0; i < data->stats->num_counters; i++) { > - attr = &data->attrs[i]; > + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) > + continue; > + attr = &data->attrs[pos]; > sysfs_attr_init(&attr->attr.attr); > attr->attr.attr.name = data->stats->descs[i].name; > attr->attr.attr.mode = 0444; > attr->attr.show = hw_stat_device_show; > attr->show = show_hw_stats; > - data->group.attrs[i] = &attr->attr.attr; > + data->group.attrs[pos] = &attr->attr.attr; > + pos++; > } This isn't OK, the hw_stat_device_show() computes the stat index like this: return stat_attr->show(ibdev, ibdev->hw_stats_data->stats, stat_attr - ibdev->hw_stats_data->attrs, 0, buf); Which assumes the stats are packed contiguously. This only works because mlx5 is always putting the optional stats at the end. > /** > * struct rdma_stat_desc > * @name - The name of the counter > - * > + * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL > */ The previous patch shouldn't have had the extra blank line then? > +int rdma_counter_modify(struct ib_device *dev, u32 port, int index, > + bool is_add); index should be unsigned int The bool is called 'is_add' here but the implementation is 'enable' ? Jason
On 9/28/2021 1:03 AM, Jason Gunthorpe wrote: > On Wed, Sep 15, 2021 at 02:07:24AM +0300, Leon Romanovsky wrote: >> >> +int rdma_counter_modify(struct ib_device *dev, u32 port, int index, bool enable) >> +{ >> + struct rdma_hw_stats *stats; >> + int ret; >> + >> + if (!dev->ops.modify_hw_stat) >> + return -EOPNOTSUPP; >> + >> + stats = ib_get_hw_stats_port(dev, port); >> + if (!stats) >> + return -EINVAL; >> + >> + mutex_lock(&stats->lock); >> + ret = dev->ops.modify_hw_stat(dev, port, index, enable); >> + if (!ret) >> + enable ? clear_bit(index, stats->is_disabled) : >> + set_bit(index, stats->is_disabled); > > This is not a kernel coding style write out the if, use success > oriented flow > > Also, shouldn't this logic protect the driver from being called on > non-optional counters? We leave it to driver, driver would return failure if modify is not supported. Is it good? >> for (i = 0; i < data->stats->num_counters; i++) { >> - attr = &data->attrs[i]; >> + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) >> + continue; >> + attr = &data->attrs[pos]; >> sysfs_attr_init(&attr->attr.attr); >> attr->attr.attr.name = data->stats->descs[i].name; >> attr->attr.attr.mode = 0444; >> attr->attr.show = hw_stat_device_show; >> attr->show = show_hw_stats; >> - data->group.attrs[i] = &attr->attr.attr; >> + data->group.attrs[pos] = &attr->attr.attr; >> + pos++; >> } > > This isn't OK, the hw_stat_device_show() computes the stat index like > this: > > return stat_attr->show(ibdev, ibdev->hw_stats_data->stats, > stat_attr - ibdev->hw_stats_data->attrs, 0, buf); > > Which assumes the stats are packed contiguously. This only works > because mlx5 is always putting the optional stats at the end. Yes you are right, thanks. Maybe we can add an "index" field in struct hw_stats_device/port_attribute, then set it in setup and use it in show.
On Tue, Sep 28, 2021 at 05:03:24PM +0800, Mark Zhang wrote: > On 9/28/2021 1:03 AM, Jason Gunthorpe wrote: > > On Wed, Sep 15, 2021 at 02:07:24AM +0300, Leon Romanovsky wrote: > > > +int rdma_counter_modify(struct ib_device *dev, u32 port, int index, bool enable) > > > +{ > > > + struct rdma_hw_stats *stats; > > > + int ret; > > > + > > > + if (!dev->ops.modify_hw_stat) > > > + return -EOPNOTSUPP; > > > + > > > + stats = ib_get_hw_stats_port(dev, port); > > > + if (!stats) > > > + return -EINVAL; > > > + > > > + mutex_lock(&stats->lock); > > > + ret = dev->ops.modify_hw_stat(dev, port, index, enable); > > > + if (!ret) > > > + enable ? clear_bit(index, stats->is_disabled) : > > > + set_bit(index, stats->is_disabled); > > > > This is not a kernel coding style write out the if, use success > > oriented flow > > > > Also, shouldn't this logic protect the driver from being called on > > non-optional counters? > > We leave it to driver, driver would return failure if modify is not > supported. Is it good? I think the core code should do it > > > for (i = 0; i < data->stats->num_counters; i++) { > > > - attr = &data->attrs[i]; > > > + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) > > > + continue; > > > + attr = &data->attrs[pos]; > > > sysfs_attr_init(&attr->attr.attr); > > > attr->attr.attr.name = data->stats->descs[i].name; > > > attr->attr.attr.mode = 0444; > > > attr->attr.show = hw_stat_device_show; > > > attr->show = show_hw_stats; > > > - data->group.attrs[i] = &attr->attr.attr; > > > + data->group.attrs[pos] = &attr->attr.attr; > > > + pos++; > > > } > > > > This isn't OK, the hw_stat_device_show() computes the stat index like > > this: > > > > return stat_attr->show(ibdev, ibdev->hw_stats_data->stats, > > stat_attr - ibdev->hw_stats_data->attrs, 0, buf); > > > > Which assumes the stats are packed contiguously. This only works > > because mlx5 is always putting the optional stats at the end. > > Yes you are right, thanks. Maybe we can add an "index" field in struct > hw_stats_device/port_attribute, then set it in setup and use it in show. You could just add a WARN_ON check that optional stats are at the end I suppose Jason
On Tue, Sep 28, 2021 at 08:51:35AM -0300, Jason Gunthorpe wrote: > On Tue, Sep 28, 2021 at 05:03:24PM +0800, Mark Zhang wrote: > > On 9/28/2021 1:03 AM, Jason Gunthorpe wrote: > > > On Wed, Sep 15, 2021 at 02:07:24AM +0300, Leon Romanovsky wrote: > > > > +int rdma_counter_modify(struct ib_device *dev, u32 port, int index, bool enable) > > > > +{ > > > > + struct rdma_hw_stats *stats; > > > > + int ret; > > > > + > > > > + if (!dev->ops.modify_hw_stat) > > > > + return -EOPNOTSUPP; > > > > + > > > > + stats = ib_get_hw_stats_port(dev, port); > > > > + if (!stats) > > > > + return -EINVAL; > > > > + > > > > + mutex_lock(&stats->lock); > > > > + ret = dev->ops.modify_hw_stat(dev, port, index, enable); > > > > + if (!ret) > > > > + enable ? clear_bit(index, stats->is_disabled) : > > > > + set_bit(index, stats->is_disabled); > > > > > > This is not a kernel coding style write out the if, use success > > > oriented flow > > > > > > Also, shouldn't this logic protect the driver from being called on > > > non-optional counters? > > > > We leave it to driver, driver would return failure if modify is not > > supported. Is it good? > > I think the core code should do it > > > > > for (i = 0; i < data->stats->num_counters; i++) { > > > > - attr = &data->attrs[i]; > > > > + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) > > > > + continue; > > > > + attr = &data->attrs[pos]; > > > > sysfs_attr_init(&attr->attr.attr); > > > > attr->attr.attr.name = data->stats->descs[i].name; > > > > attr->attr.attr.mode = 0444; > > > > attr->attr.show = hw_stat_device_show; > > > > attr->show = show_hw_stats; > > > > - data->group.attrs[i] = &attr->attr.attr; > > > > + data->group.attrs[pos] = &attr->attr.attr; > > > > + pos++; > > > > } > > > > > > This isn't OK, the hw_stat_device_show() computes the stat index like > > > this: > > > > > > return stat_attr->show(ibdev, ibdev->hw_stats_data->stats, > > > stat_attr - ibdev->hw_stats_data->attrs, 0, buf); > > > > > > Which assumes the stats are packed contiguously. This only works > > > because mlx5 is always putting the optional stats at the end. > > > > Yes you are right, thanks. Maybe we can add an "index" field in struct > > hw_stats_device/port_attribute, then set it in setup and use it in show. > > You could just add a WARN_ON check that optional stats are at the end > I suppose Everyone adds their counters to the end, last example is bnxt_re 9a381f7e5aa2 ("RDMA/bnxt_re: Add extended statistics counters") Thanks > > Jason
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index a9559e33a113..974abc73a033 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -106,6 +106,28 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter, return ret; } +int rdma_counter_modify(struct ib_device *dev, u32 port, int index, bool enable) +{ + struct rdma_hw_stats *stats; + int ret; + + if (!dev->ops.modify_hw_stat) + return -EOPNOTSUPP; + + stats = ib_get_hw_stats_port(dev, port); + if (!stats) + return -EINVAL; + + mutex_lock(&stats->lock); + ret = dev->ops.modify_hw_stat(dev, port, index, enable); + if (!ret) + enable ? clear_bit(index, stats->is_disabled) : + set_bit(index, stats->is_disabled); + mutex_unlock(&stats->lock); + + return ret; +} + static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, struct ib_qp *qp, enum rdma_nl_counter_mode mode) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f4814bb7f082..22a4adda7981 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2676,6 +2676,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, modify_cq); SET_DEVICE_OP(dev_ops, modify_device); SET_DEVICE_OP(dev_ops, modify_flow_action_esp); + SET_DEVICE_OP(dev_ops, modify_hw_stat); SET_DEVICE_OP(dev_ops, modify_port); SET_DEVICE_OP(dev_ops, modify_qp); SET_DEVICE_OP(dev_ops, modify_srq); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index a26bf960f7ef..c5fc86d8ed3e 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -938,7 +938,7 @@ int ib_setup_device_attrs(struct ib_device *ibdev) { struct hw_stats_device_attribute *attr; struct hw_stats_device_data *data; - int i, ret; + int i, ret, pos = 0; data = alloc_hw_stats_device(ibdev); if (IS_ERR(data)) { @@ -959,16 +959,19 @@ int ib_setup_device_attrs(struct ib_device *ibdev) data->stats->timestamp = jiffies; for (i = 0; i < data->stats->num_counters; i++) { - attr = &data->attrs[i]; + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) + continue; + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.mode = 0444; attr->attr.show = hw_stat_device_show; attr->show = show_hw_stats; - data->group.attrs[i] = &attr->attr.attr; + data->group.attrs[pos] = &attr->attr.attr; + pos++; } - attr = &data->attrs[i]; + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = "lifespan"; attr->attr.attr.mode = 0644; @@ -976,7 +979,7 @@ int ib_setup_device_attrs(struct ib_device *ibdev) attr->show = show_stats_lifespan; attr->attr.store = hw_stat_device_store; attr->store = set_stats_lifespan; - data->group.attrs[i] = &attr->attr.attr; + data->group.attrs[pos] = &attr->attr.attr; for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++) if (!ibdev->groups[i]) { ibdev->groups[i] = &data->group; @@ -1032,7 +1035,7 @@ static int setup_hw_port_stats(struct ib_port *port, { struct hw_stats_port_attribute *attr; struct hw_stats_port_data *data; - int i, ret; + int i, ret, pos = 0; data = alloc_hw_stats_port(port, group); if (IS_ERR(data)) @@ -1050,16 +1053,19 @@ static int setup_hw_port_stats(struct ib_port *port, data->stats->timestamp = jiffies; for (i = 0; i < data->stats->num_counters; i++) { - attr = &data->attrs[i]; + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) + continue; + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.mode = 0444; attr->attr.show = hw_stat_port_show; attr->show = show_hw_stats; - group->attrs[i] = &attr->attr.attr; + group->attrs[pos] = &attr->attr.attr; + pos++; } - attr = &data->attrs[i]; + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = "lifespan"; attr->attr.attr.mode = 0644; @@ -1067,7 +1073,7 @@ static int setup_hw_port_stats(struct ib_port *port, attr->show = show_stats_lifespan; attr->attr.store = hw_stat_port_store; attr->store = set_stats_lifespan; - group->attrs[i] = &attr->attr.attr; + group->attrs[pos] = &attr->attr.attr; port->hw_stats_data = data; return 0; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index f016bc0cd9de..e825e8e7accf 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -545,13 +545,18 @@ enum ib_port_speed { IB_SPEED_NDR = 128, }; +enum ib_stat_flag { + IB_STAT_FLAG_OPTIONAL = 1 << 0, +}; + /** * struct rdma_stat_desc * @name - The name of the counter - * + * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL */ struct rdma_stat_desc { const char *name; + unsigned int flags; }; /** @@ -2591,6 +2596,13 @@ struct ib_device_ops { int (*get_hw_stats)(struct ib_device *device, struct rdma_hw_stats *stats, u32 port, int index); + /** + * modify_hw_stat - Modify the counter configuration + * @enable: true/false when enable/disable a counter + * Return codes - 0 on success or error code otherwise. + */ + int (*modify_hw_stat)(struct ib_device *device, u32 port, + int counter_index, bool enable); /** * Allows rdma drivers to add their own restrack attributes. */ diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h index 0295b22cd1cd..b21ea39efc6c 100644 --- a/include/rdma/rdma_counter.h +++ b/include/rdma/rdma_counter.h @@ -63,4 +63,6 @@ int rdma_counter_get_mode(struct ib_device *dev, u32 port, enum rdma_nl_counter_mode *mode, enum rdma_nl_counter_mask *mask); +int rdma_counter_modify(struct ib_device *dev, u32 port, int index, + bool is_add); #endif /* _RDMA_COUNTER_H_ */