Message ID | 20240227080303.63894-4-xuanzhuo@linux.alibaba.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | virtio-net: support device stats | expand |
Tue, Feb 27, 2024 at 09:03:00AM CET, xuanzhuo@linux.alibaba.com wrote: >As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 > >make virtio-net support getting the stats from the device by ethtool -S ><eth0>. Would be nice to have example output of this command included here as well as in the cover letter. > >Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> >--- > drivers/net/virtio_net.c | 362 ++++++++++++++++++++++++++++++++++++++- > 1 file changed, 358 insertions(+), 4 deletions(-) > >diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >index af512d85cd5b..5549fc8508bd 100644 >--- a/drivers/net/virtio_net.c >+++ b/drivers/net/virtio_net.c >@@ -128,6 +128,121 @@ static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { > #define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc) > #define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc) > >+#define VIRTNET_STATS_DESC(qtype, class, name) \ >+ {#name, offsetof(struct virtio_net_stats_ ## qtype ## _ ## class, qtype ## _ ## name)} >+ >+static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { >+ {"command_num", offsetof(struct virtio_net_stats_cvq, command_num)}, >+ {"ok_num", offsetof(struct virtio_net_stats_cvq, ok_num)} >+}; >+ >+static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { >+ VIRTNET_STATS_DESC(rx, basic, packets), >+ VIRTNET_STATS_DESC(rx, basic, bytes), >+ >+ VIRTNET_STATS_DESC(rx, basic, notifications), >+ VIRTNET_STATS_DESC(rx, basic, interrupts), >+ >+ VIRTNET_STATS_DESC(rx, basic, drops), >+ VIRTNET_STATS_DESC(rx, basic, drop_overruns), >+}; >+ >+static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { >+ VIRTNET_STATS_DESC(tx, basic, packets), >+ VIRTNET_STATS_DESC(tx, basic, bytes), >+ >+ VIRTNET_STATS_DESC(tx, basic, notifications), >+ VIRTNET_STATS_DESC(tx, basic, interrupts), >+ >+ VIRTNET_STATS_DESC(tx, basic, drops), >+ VIRTNET_STATS_DESC(tx, basic, drop_malformed), >+}; >+ >+static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { >+ VIRTNET_STATS_DESC(rx, csum, csum_valid), >+ VIRTNET_STATS_DESC(rx, csum, needs_csum), >+ >+ VIRTNET_STATS_DESC(rx, csum, csum_none), >+ VIRTNET_STATS_DESC(rx, csum, csum_bad), >+}; >+ >+static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc[] = { >+ VIRTNET_STATS_DESC(tx, csum, needs_csum), >+ VIRTNET_STATS_DESC(tx, csum, csum_none), >+}; >+ >+static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc[] = { >+ VIRTNET_STATS_DESC(rx, gso, gso_packets), >+ VIRTNET_STATS_DESC(rx, gso, gso_bytes), >+ VIRTNET_STATS_DESC(rx, gso, gso_packets_coalesced), >+ VIRTNET_STATS_DESC(rx, gso, gso_bytes_coalesced), >+}; >+ >+static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { >+ VIRTNET_STATS_DESC(tx, gso, gso_packets), >+ VIRTNET_STATS_DESC(tx, gso, gso_bytes), >+ VIRTNET_STATS_DESC(tx, gso, gso_segments), >+ VIRTNET_STATS_DESC(tx, gso, gso_segments_bytes), >+ VIRTNET_STATS_DESC(tx, gso, gso_packets_noseg), >+ VIRTNET_STATS_DESC(tx, gso, gso_bytes_noseg), >+}; >+ >+static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { >+ VIRTNET_STATS_DESC(rx, speed, packets_allowance_exceeded), >+ VIRTNET_STATS_DESC(rx, speed, bytes_allowance_exceeded), >+}; >+ >+static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { >+ VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded), >+ VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded), >+}; >+ >+struct virtnet_stats_map { >+ /* the stat type in bitmap */ >+ u64 stat_type; >+ >+ /* the bytes of the response for the stat */ >+ u32 len; >+ >+ /* the num of the response fields for the stat */ >+ u32 num; >+ >+#define VIRTNET_STATS_Q_TYPE_RX 0 >+#define VIRTNET_STATS_Q_TYPE_TX 1 >+#define VIRTNET_STATS_Q_TYPE_CQ 2 Enum? Then you don't need to have it here in struct but above it. >+ u32 queue_type; >+ >+ /* the reply type of the stat */ >+ u8 reply_type; >+ >+ /* describe the name and the offset in the response */ >+ const struct virtnet_stat_desc *desc; >+}; >+ >+#define VIRTNET_DEVICE_STATS_MAP_ITEM(TYPE, type, queue_type) \ >+ { \ >+ VIRTIO_NET_STATS_TYPE_##TYPE, \ >+ sizeof(struct virtio_net_stats_ ## type), \ >+ ARRAY_SIZE(virtnet_stats_ ## type ##_desc), \ >+ VIRTNET_STATS_Q_TYPE_##queue_type, \ >+ VIRTIO_NET_STATS_TYPE_REPLY_##TYPE, \ >+ &virtnet_stats_##type##_desc[0] \ >+ } >+ >+static struct virtnet_stats_map virtio_net_stats_map[] = { >+ VIRTNET_DEVICE_STATS_MAP_ITEM(CVQ, cvq, CQ), >+ >+ VIRTNET_DEVICE_STATS_MAP_ITEM(RX_BASIC, rx_basic, RX), >+ VIRTNET_DEVICE_STATS_MAP_ITEM(RX_CSUM, rx_csum, RX), >+ VIRTNET_DEVICE_STATS_MAP_ITEM(RX_GSO, rx_gso, RX), >+ VIRTNET_DEVICE_STATS_MAP_ITEM(RX_SPEED, rx_speed, RX), >+ >+ VIRTNET_DEVICE_STATS_MAP_ITEM(TX_BASIC, tx_basic, TX), >+ VIRTNET_DEVICE_STATS_MAP_ITEM(TX_CSUM, tx_csum, TX), >+ VIRTNET_DEVICE_STATS_MAP_ITEM(TX_GSO, tx_gso, TX), >+ VIRTNET_DEVICE_STATS_MAP_ITEM(TX_SPEED, tx_speed, TX), >+}; >+ > struct virtnet_interrupt_coalesce { > u32 max_packets; > u32 max_usecs; >@@ -244,6 +359,7 @@ struct control_buf { > struct virtio_net_ctrl_coal_tx coal_tx; > struct virtio_net_ctrl_coal_rx coal_rx; > struct virtio_net_ctrl_coal_vq coal_vq; >+ struct virtio_net_stats_capabilities stats_cap; > }; > > struct virtnet_info { >@@ -329,6 +445,8 @@ struct virtnet_info { > > /* failover when STANDBY feature enabled */ > struct failover *failover; >+ >+ u64 device_stats_cap; > }; > > struct padded_vnet_hdr { >@@ -3263,6 +3381,204 @@ static int virtnet_set_channels(struct net_device *dev, > return err; > } > >+static void virtnet_get_hw_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) >+{ >+ struct virtnet_stats_map *m; >+ int i, j; >+ u8 *p = *data; Reverse christmas tree: https://www.kernel.org/doc/html/v6.6/process/maintainer-netdev.html#tl-dr >+ >+ if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) >+ return; >+ >+ for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) { >+ m = &virtio_net_stats_map[i]; >+ >+ if (m->queue_type != type) >+ continue; >+ >+ if (!(vi->device_stats_cap & m->stat_type)) >+ continue; >+ >+ for (j = 0; j < m->num; ++j) { >+ if (type == VIRTNET_STATS_Q_TYPE_RX) >+ ethtool_sprintf(&p, "rx_queue_hw_%u_%s", qid, m->desc[j].desc); >+ >+ else if (type == VIRTNET_STATS_Q_TYPE_TX) >+ ethtool_sprintf(&p, "tx_queue_hw_%u_%s", qid, m->desc[j].desc); >+ >+ else if (type == VIRTNET_STATS_Q_TYPE_CQ) >+ ethtool_sprintf(&p, "cq_hw_%s", m->desc[j].desc); Switch-case? >+ } >+ } >+ >+ *data = p; >+} >+ >+struct virtnet_stats_ctx { >+ u32 num_cq; >+ u32 num_rx; >+ u32 num_tx; >+ >+ u64 bitmap_cq; >+ u64 bitmap_rx; >+ u64 bitmap_tx; >+ >+ u32 size_cq; >+ u32 size_rx; >+ u32 size_tx; >+ >+ u64 *data; >+}; >+ >+static void virtnet_stats_ctx_init(struct virtnet_info *vi, >+ struct virtnet_stats_ctx *ctx, >+ u64 *data) >+{ >+ struct virtnet_stats_map *m; >+ int i; >+ >+ ctx->data = data; >+ >+ for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) { >+ m = &virtio_net_stats_map[i]; >+ >+ if (vi->device_stats_cap & m->stat_type) { if (!(vi->device_stats_cap & m->stat_type) continue; would let you save one level of indent below. >+ if (m->queue_type == VIRTNET_STATS_Q_TYPE_CQ) { >+ ctx->bitmap_cq |= m->stat_type; >+ ctx->num_cq += m->num; >+ ctx->size_cq += m->len; >+ } >+ >+ if (m->queue_type == VIRTNET_STATS_Q_TYPE_RX) { >+ ctx->bitmap_rx |= m->stat_type; >+ ctx->num_rx += m->num; >+ ctx->size_rx += m->len; >+ } >+ >+ if (m->queue_type == VIRTNET_STATS_Q_TYPE_TX) { >+ ctx->bitmap_tx |= m->stat_type; >+ ctx->num_tx += m->num; >+ ctx->size_tx += m->len; >+ } Switch-case? >+ } >+ } >+} >+ >+static int virtnet_get_hw_stats(struct virtnet_info *vi, >+ struct virtnet_stats_ctx *ctx) >+{ >+ struct virtio_net_ctrl_queue_stats *req; >+ struct virtio_net_stats_reply_hdr *hdr; >+ struct scatterlist sgs_in, sgs_out; >+ u32 num_rx, num_tx, num_cq, offset; >+ int qnum, i, j, qid, res_size; >+ struct virtnet_stats_map *m; >+ void *reply, *p; >+ u64 bitmap; >+ int ok; >+ u64 *v; Single letter variables are always frowned-upon: m, v, p. The non-iterator variables could have meaningful name. The code is then much easier to follow. Could you name them please? This applies to the rest of the code as well of course. >+ >+ if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) >+ return 0; >+ >+ qnum = 0; >+ if (ctx->bitmap_cq) >+ qnum += 1; qnum++ ? >+ >+ if (ctx->bitmap_rx) >+ qnum += vi->curr_queue_pairs; >+ >+ if (ctx->bitmap_tx) >+ qnum += vi->curr_queue_pairs; >+ >+ req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); >+ if (!req) >+ return -ENOMEM; >+ >+ res_size = (ctx->size_rx + ctx->size_tx) * vi->curr_queue_pairs + ctx->size_cq; >+ reply = kmalloc(res_size, GFP_KERNEL); >+ if (!reply) { >+ kfree(req); >+ return -ENOMEM; >+ } >+ >+ j = 0; >+ for (i = 0; i < vi->curr_queue_pairs; ++i) { >+ if (ctx->bitmap_rx) { >+ req->stats[j].vq_index = cpu_to_le16(i * 2); >+ req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_rx); >+ ++j; >+ } >+ >+ if (ctx->bitmap_tx) { >+ req->stats[j].vq_index = cpu_to_le16(i * 2 + 1); >+ req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_tx); >+ ++j; >+ } >+ } >+ >+ if (ctx->size_cq) { >+ req->stats[j].vq_index = cpu_to_le16(vi->max_queue_pairs * 2); >+ req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_cq); >+ ++j; >+ } >+ >+ sg_init_one(&sgs_out, req, sizeof(*req) * j); >+ sg_init_one(&sgs_in, reply, res_size); >+ >+ ok = virtnet_send_command(vi, VIRTIO_NET_CTRL_STATS, >+ VIRTIO_NET_CTRL_STATS_GET, >+ &sgs_out, &sgs_in); >+ kfree(req); >+ >+ if (!ok) { >+ kfree(reply); >+ return ok; virtnet_send_command() returns bool. This function returns 0/-EXX. Please fix the return value here. Or is it supposed to be 0? In that case just return 0 here. But I think this should return error. >+ } >+ >+ num_rx = VIRTNET_RQ_STATS_LEN + ctx->num_rx; >+ num_tx = VIRTNET_SQ_STATS_LEN + ctx->num_tx; >+ num_cq = ctx->num_tx; >+ >+ for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { >+ hdr = p; >+ >+ qid = le16_to_cpu(hdr->vq_index); >+ >+ if (qid == vi->max_queue_pairs * 2) { >+ offset = 0; >+ bitmap = ctx->bitmap_cq; >+ } else if (qid % 2) { >+ offset = num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); >+ offset += VIRTNET_SQ_STATS_LEN; >+ bitmap = ctx->bitmap_tx; >+ } else { >+ offset = num_cq + num_rx * (qid / 2) + VIRTNET_RQ_STATS_LEN; >+ bitmap = ctx->bitmap_rx; >+ } >+ >+ for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) { >+ m = &virtio_net_stats_map[i]; >+ >+ if (m->stat_type & bitmap) >+ offset += m->num; >+ >+ if (hdr->type != m->reply_type) >+ continue; >+ >+ for (j = 0; j < m->num; ++j) { >+ v = p + m->desc[j].offset; >+ ctx->data[offset + j] = le64_to_cpu(*v); >+ } >+ >+ break; >+ } >+ } >+ >+ kfree(reply); >+ return 0; >+} >+ > static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) > { > struct virtnet_info *vi = netdev_priv(dev); >@@ -3271,16 +3587,22 @@ static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) > > switch (stringset) { > case ETH_SS_STATS: >+ virtnet_get_hw_stats_string(vi, VIRTNET_STATS_Q_TYPE_CQ, 0, &p); >+ > for (i = 0; i < vi->curr_queue_pairs; i++) { > for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) > ethtool_sprintf(&p, "rx_queue_%u_%s", i, > virtnet_rq_stats_desc[j].desc); >+ >+ virtnet_get_hw_stats_string(vi, VIRTNET_STATS_Q_TYPE_RX, i, &p); > } > > for (i = 0; i < vi->curr_queue_pairs; i++) { > for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) > ethtool_sprintf(&p, "tx_queue_%u_%s", i, > virtnet_sq_stats_desc[j].desc); >+ >+ virtnet_get_hw_stats_string(vi, VIRTNET_STATS_Q_TYPE_TX, i, &p); > } > break; > } >@@ -3289,11 +3611,35 @@ static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) > static int virtnet_get_sset_count(struct net_device *dev, int sset) > { > struct virtnet_info *vi = netdev_priv(dev); >+ struct virtnet_stats_ctx ctx = {0}; >+ u32 pair_count; > > switch (sset) { > case ETH_SS_STATS: >- return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN + >- VIRTNET_SQ_STATS_LEN); >+ if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS) && >+ !vi->device_stats_cap) { >+ struct scatterlist sg; >+ >+ sg_init_one(&sg, &vi->ctrl->stats_cap, sizeof(vi->ctrl->stats_cap)); >+ >+ if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_STATS, >+ VIRTIO_NET_CTRL_STATS_QUERY, >+ NULL, &sg)) { >+ dev_warn(&dev->dev, "Fail to get stats capability\n"); >+ } else { >+ __le64 v; >+ >+ v = vi->ctrl->stats_cap.supported_stats_types[0]; >+ vi->device_stats_cap = le64_to_cpu(v); >+ } >+ } >+ >+ virtnet_stats_ctx_init(vi, &ctx, NULL); >+ >+ pair_count = VIRTNET_RQ_STATS_LEN + VIRTNET_SQ_STATS_LEN; >+ pair_count += ctx.num_rx + ctx.num_tx; >+ >+ return ctx.num_cq + vi->curr_queue_pairs * pair_count; > default: > return -EOPNOTSUPP; > } >@@ -3303,11 +3649,17 @@ static void virtnet_get_ethtool_stats(struct net_device *dev, > struct ethtool_stats *stats, u64 *data) > { > struct virtnet_info *vi = netdev_priv(dev); >- unsigned int idx = 0, start, i, j; >+ struct virtnet_stats_ctx ctx = {0}; >+ unsigned int idx, start, i, j; > const u8 *stats_base; > const u64_stats_t *p; > size_t offset; > >+ virtnet_stats_ctx_init(vi, &ctx, data); >+ virtnet_get_hw_stats(vi, &ctx); Check the function return value. Print out an error in case there is one at least. Btw, did you consider obtaining these stats asynchronously? >+ >+ idx = ctx.num_cq; >+ > for (i = 0; i < vi->curr_queue_pairs; i++) { > struct receive_queue *rq = &vi->rq[i]; > >@@ -3321,6 +3673,7 @@ static void virtnet_get_ethtool_stats(struct net_device *dev, > } > } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); > idx += VIRTNET_RQ_STATS_LEN; >+ idx += ctx.num_rx; > } > > for (i = 0; i < vi->curr_queue_pairs; i++) { >@@ -3336,6 +3689,7 @@ static void virtnet_get_ethtool_stats(struct net_device *dev, > } > } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); > idx += VIRTNET_SQ_STATS_LEN; >+ idx += ctx.num_tx; > } > } > >@@ -4963,7 +5317,7 @@ static struct virtio_device_id id_table[] = { > VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ > VIRTIO_NET_F_VQ_NOTF_COAL, \ >- VIRTIO_NET_F_GUEST_HDRLEN >+ VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS > > static unsigned int features[] = { > VIRTNET_FEATURES, >-- >2.32.0.3.g01195cf9f > >
On Tue, Feb 27, 2024 at 04:03:00PM +0800, Xuan Zhuo wrote: > As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 > > make virtio-net support getting the stats from the device by ethtool -S > <eth0>. > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> ... > +static int virtnet_get_hw_stats(struct virtnet_info *vi, > + struct virtnet_stats_ctx *ctx) > +{ > + struct virtio_net_ctrl_queue_stats *req; > + struct virtio_net_stats_reply_hdr *hdr; > + struct scatterlist sgs_in, sgs_out; > + u32 num_rx, num_tx, num_cq, offset; > + int qnum, i, j, qid, res_size; > + struct virtnet_stats_map *m; > + void *reply, *p; > + u64 bitmap; > + int ok; > + u64 *v; > + > + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) > + return 0; > + > + qnum = 0; > + if (ctx->bitmap_cq) > + qnum += 1; > + > + if (ctx->bitmap_rx) > + qnum += vi->curr_queue_pairs; > + > + if (ctx->bitmap_tx) > + qnum += vi->curr_queue_pairs; > + > + req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); > + if (!req) > + return -ENOMEM; > + > + res_size = (ctx->size_rx + ctx->size_tx) * vi->curr_queue_pairs + ctx->size_cq; > + reply = kmalloc(res_size, GFP_KERNEL); > + if (!reply) { > + kfree(req); > + return -ENOMEM; > + } > + > + j = 0; > + for (i = 0; i < vi->curr_queue_pairs; ++i) { > + if (ctx->bitmap_rx) { > + req->stats[j].vq_index = cpu_to_le16(i * 2); > + req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_rx); > + ++j; > + } > + > + if (ctx->bitmap_tx) { > + req->stats[j].vq_index = cpu_to_le16(i * 2 + 1); > + req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_tx); > + ++j; > + } > + } > + > + if (ctx->size_cq) { > + req->stats[j].vq_index = cpu_to_le16(vi->max_queue_pairs * 2); > + req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_cq); > + ++j; > + } > + > + sg_init_one(&sgs_out, req, sizeof(*req) * j); > + sg_init_one(&sgs_in, reply, res_size); > + > + ok = virtnet_send_command(vi, VIRTIO_NET_CTRL_STATS, > + VIRTIO_NET_CTRL_STATS_GET, > + &sgs_out, &sgs_in); > + kfree(req); > + > + if (!ok) { > + kfree(reply); > + return ok; > + } > + > + num_rx = VIRTNET_RQ_STATS_LEN + ctx->num_rx; > + num_tx = VIRTNET_SQ_STATS_LEN + ctx->num_tx; > + num_cq = ctx->num_tx; > + > + for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { > + hdr = p; > + > + qid = le16_to_cpu(hdr->vq_index); > + > + if (qid == vi->max_queue_pairs * 2) { > + offset = 0; > + bitmap = ctx->bitmap_cq; > + } else if (qid % 2) { > + offset = num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); > + offset += VIRTNET_SQ_STATS_LEN; > + bitmap = ctx->bitmap_tx; > + } else { > + offset = num_cq + num_rx * (qid / 2) + VIRTNET_RQ_STATS_LEN; > + bitmap = ctx->bitmap_rx; > + } > + > + for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) { > + m = &virtio_net_stats_map[i]; > + > + if (m->stat_type & bitmap) > + offset += m->num; > + > + if (hdr->type != m->reply_type) > + continue; > + > + for (j = 0; j < m->num; ++j) { > + v = p + m->desc[j].offset; > + ctx->data[offset + j] = le64_to_cpu(*v); Hi Xuan Zhuo, Sparse complains about the line above because the type of *v is u64, but le64_to_cpu() expects __le64. > + } > + > + break; > + } > + } > + > + kfree(reply); > + return 0; > +} > + > static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) > { > struct virtnet_info *vi = netdev_priv(dev); ...
Hi Xuan, kernel test robot noticed the following build warnings: [auto build test WARNING on net-next/main] url: https://github.com/intel-lab-lkp/linux/commits/Xuan-Zhuo/virtio_net-introduce-device-stats-feature-and-structures/20240227-161123 base: net-next/main patch link: https://lore.kernel.org/r/20240227080303.63894-4-xuanzhuo%40linux.alibaba.com patch subject: [PATCH net-next v3 3/6] virtio_net: support device stats config: x86_64-randconfig-121-20240229 (https://download.01.org/0day-ci/archive/20240229/202402291808.cmzZAiYX-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240229/202402291808.cmzZAiYX-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202402291808.cmzZAiYX-lkp@intel.com/ sparse warnings: (new ones prefixed by >>) >> drivers/net/virtio_net.c:3571:57: sparse: sparse: cast to restricted __le64 vim +3571 drivers/net/virtio_net.c 3466 3467 static int virtnet_get_hw_stats(struct virtnet_info *vi, 3468 struct virtnet_stats_ctx *ctx) 3469 { 3470 struct virtio_net_ctrl_queue_stats *req; 3471 struct virtio_net_stats_reply_hdr *hdr; 3472 struct scatterlist sgs_in, sgs_out; 3473 u32 num_rx, num_tx, num_cq, offset; 3474 int qnum, i, j, qid, res_size; 3475 struct virtnet_stats_map *m; 3476 void *reply, *p; 3477 u64 bitmap; 3478 int ok; 3479 u64 *v; 3480 3481 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) 3482 return 0; 3483 3484 qnum = 0; 3485 if (ctx->bitmap_cq) 3486 qnum += 1; 3487 3488 if (ctx->bitmap_rx) 3489 qnum += vi->curr_queue_pairs; 3490 3491 if (ctx->bitmap_tx) 3492 qnum += vi->curr_queue_pairs; 3493 3494 req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); 3495 if (!req) 3496 return -ENOMEM; 3497 3498 res_size = (ctx->size_rx + ctx->size_tx) * vi->curr_queue_pairs + ctx->size_cq; 3499 reply = kmalloc(res_size, GFP_KERNEL); 3500 if (!reply) { 3501 kfree(req); 3502 return -ENOMEM; 3503 } 3504 3505 j = 0; 3506 for (i = 0; i < vi->curr_queue_pairs; ++i) { 3507 if (ctx->bitmap_rx) { 3508 req->stats[j].vq_index = cpu_to_le16(i * 2); 3509 req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_rx); 3510 ++j; 3511 } 3512 3513 if (ctx->bitmap_tx) { 3514 req->stats[j].vq_index = cpu_to_le16(i * 2 + 1); 3515 req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_tx); 3516 ++j; 3517 } 3518 } 3519 3520 if (ctx->size_cq) { 3521 req->stats[j].vq_index = cpu_to_le16(vi->max_queue_pairs * 2); 3522 req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_cq); 3523 ++j; 3524 } 3525 3526 sg_init_one(&sgs_out, req, sizeof(*req) * j); 3527 sg_init_one(&sgs_in, reply, res_size); 3528 3529 ok = virtnet_send_command(vi, VIRTIO_NET_CTRL_STATS, 3530 VIRTIO_NET_CTRL_STATS_GET, 3531 &sgs_out, &sgs_in); 3532 kfree(req); 3533 3534 if (!ok) { 3535 kfree(reply); 3536 return ok; 3537 } 3538 3539 num_rx = VIRTNET_RQ_STATS_LEN + ctx->num_rx; 3540 num_tx = VIRTNET_SQ_STATS_LEN + ctx->num_tx; 3541 num_cq = ctx->num_tx; 3542 3543 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { 3544 hdr = p; 3545 3546 qid = le16_to_cpu(hdr->vq_index); 3547 3548 if (qid == vi->max_queue_pairs * 2) { 3549 offset = 0; 3550 bitmap = ctx->bitmap_cq; 3551 } else if (qid % 2) { 3552 offset = num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); 3553 offset += VIRTNET_SQ_STATS_LEN; 3554 bitmap = ctx->bitmap_tx; 3555 } else { 3556 offset = num_cq + num_rx * (qid / 2) + VIRTNET_RQ_STATS_LEN; 3557 bitmap = ctx->bitmap_rx; 3558 } 3559 3560 for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) { 3561 m = &virtio_net_stats_map[i]; 3562 3563 if (m->stat_type & bitmap) 3564 offset += m->num; 3565 3566 if (hdr->type != m->reply_type) 3567 continue; 3568 3569 for (j = 0; j < m->num; ++j) { 3570 v = p + m->desc[j].offset; > 3571 ctx->data[offset + j] = le64_to_cpu(*v); 3572 } 3573 3574 break; 3575 } 3576 } 3577 3578 kfree(reply); 3579 return 0; 3580 } 3581
CC: Willem and some driver folks for more input, context: extending https://lore.kernel.org/all/20240306195509.1502746-1-kuba@kernel.org/ to cover virtio stats. On Tue, 27 Feb 2024 16:03:00 +0800 Xuan Zhuo wrote: > +static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { > + VIRTNET_STATS_DESC(rx, basic, packets), > + VIRTNET_STATS_DESC(rx, basic, bytes), Covered. > + VIRTNET_STATS_DESC(rx, basic, notifications), > + VIRTNET_STATS_DESC(rx, basic, interrupts), I haven't seen HW devices count interrupts coming from a specific queue (there's usually a lot more queues than IRQs these days), let's keep these in ethtool -S for now, unless someone has a HW use case. > + VIRTNET_STATS_DESC(rx, basic, drops), > + VIRTNET_STATS_DESC(rx, basic, drop_overruns), These are important, but we need to make sure we have a good definition for vendors to follow... drops I'd define as "sum of all packets which came into the device, but never left it, including but not limited to: packets dropped due to lack of buffer space, processing errors, explicitly set policies and packet filters." Call it hw-rx-drops ? overruns is a bit harder to precisely define. I was thinking of something more broad, like: "packets dropped due to transient lack of resources, such as buffer space, host descriptors etc." For context why not just go with virtio spec definition of "no descriptors" - for HW devices, what exact point in the pipeline drops depends on how back pressure is configured/implemented, and fetching descriptors is high latency, so differentiating between "PCIe is slow" and "host didn't post descriptors" is hard in practice. Call it hw-rx-drop-overruns ? > +static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { > + VIRTNET_STATS_DESC(tx, basic, packets), > + VIRTNET_STATS_DESC(tx, basic, bytes), > + > + VIRTNET_STATS_DESC(tx, basic, notifications), > + VIRTNET_STATS_DESC(tx, basic, interrupts), > + > + VIRTNET_STATS_DESC(tx, basic, drops), These 5 same as rx. > + VIRTNET_STATS_DESC(tx, basic, drop_malformed), These I'd call hw-tx-drop-errors, "packets dropped because they were invalid or malformed"? > +static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { > + VIRTNET_STATS_DESC(rx, csum, csum_valid), I think in kernel parlance that would translate to CHECKSUM_UNNECESSARY? So let's call it rx-csum-unnecessary ? I'd skip the hw- prefix for this one, it doesn't matter to the user if the HW or SW counted it. > + VIRTNET_STATS_DESC(rx, csum, needs_csum), Hm, I think this is a bit software/virt device specific, presumably rx-csum-partial for the kernel, up to you whether to make it ethtool -S or netlink. > + VIRTNET_STATS_DESC(rx, csum, csum_none), > + VIRTNET_STATS_DESC(rx, csum, csum_bad), These two make sense as is in netlink, should be fairly commonly reported by devices. Maybe add a note in "bad" that packets with bad csum are not discarded, but still delivered to the stack. > +static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc[] = { > + VIRTNET_STATS_DESC(tx, csum, needs_csum), > + VIRTNET_STATS_DESC(tx, csum, csum_none), tx- version of what names we pick for rx-, netlink seems appropriate. > +static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc[] = { > + VIRTNET_STATS_DESC(rx, gso, gso_packets), > + VIRTNET_STATS_DESC(rx, gso, gso_bytes), I used the term "GSO" in conversations about Rx and it often confuses people. Let's use "GRO", so hw-gro-packets, and hw-gro-bytes ? Or maybe coalesce? "hw-rx-coalesce" ? That's quite a bit longer.. Ah, and please mention in the doc that these counters "do not cover LRO i.e. any coalescing implementation which doesn't follow GRO rules". > + VIRTNET_STATS_DESC(rx, gso, gso_packets_coalesced), hw-gro-wire-packets ? No strong preference on the naming, but I find that saying -wire makes it 100% clear to everyone what the meaning is. > + VIRTNET_STATS_DESC(rx, gso, gso_bytes_coalesced), The documentation in the virtio spec seems to be identical to the one for gso_packets, which gotta be unintentional? I'm guessing this is hw-gro-wire-bytes? I.e. headers counted multiple times? > +static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { > + VIRTNET_STATS_DESC(tx, gso, gso_packets), > + VIRTNET_STATS_DESC(tx, gso, gso_bytes), > + VIRTNET_STATS_DESC(tx, gso, gso_segments), > + VIRTNET_STATS_DESC(tx, gso, gso_segments_bytes), these 4 make sense as mirror of the Rx > + VIRTNET_STATS_DESC(tx, gso, gso_packets_noseg), > + VIRTNET_STATS_DESC(tx, gso, gso_bytes_noseg), Not sure what these are :) unless someone knows what it is and that HW devices report it, let's keep them in ethtool -S ? > +static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { > + VIRTNET_STATS_DESC(rx, speed, packets_allowance_exceeded), hw-rx-drop-ratelimits ? "Allowance exceeded" is a bit of a mouthful to me, perhaps others disagree. The description from the virtio spec is quite good. > + VIRTNET_STATS_DESC(rx, speed, bytes_allowance_exceeded), No strong preference whether to expose this as a standard stat or ethtool -S, we don't generally keep byte counters for drops, so this would be special. > +static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { > + VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded), > + VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded), same as rx
On Thu, 7 Mar 2024 08:50:21 -0800, Jakub Kicinski <kuba@kernel.org> wrote: > CC: Willem and some driver folks for more input, context: extending > https://lore.kernel.org/all/20240306195509.1502746-1-kuba@kernel.org/ > to cover virtio stats. > > On Tue, 27 Feb 2024 16:03:00 +0800 Xuan Zhuo wrote: > > +static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { > > + VIRTNET_STATS_DESC(rx, basic, packets), > > + VIRTNET_STATS_DESC(rx, basic, bytes), > > Covered. About "packets" and "bytes", here is coming from the hw device. Actually the driver also count "packets" and "bytes" in SW. So there are HW and SW versions. Do we need to distinguish them? > > > + VIRTNET_STATS_DESC(rx, basic, notifications), > > + VIRTNET_STATS_DESC(rx, basic, interrupts), > > I haven't seen HW devices count interrupts coming from a specific > queue (there's usually a lot more queues than IRQs these days), > let's keep these in ethtool -S for now, unless someone has a HW use > case. OK. > > > + VIRTNET_STATS_DESC(rx, basic, drops), > > + VIRTNET_STATS_DESC(rx, basic, drop_overruns), > > These are important, but we need to make sure we have a good definition > for vendors to follow... > > drops I'd define as "sum of all packets which came into the device, but > never left it, including but not limited to: packets dropped due to > lack of buffer space, processing errors, explicitly set policies and > packet filters." > Call it hw-rx-drops ? I agree. > > overruns is a bit harder to precisely define. I was thinking of > something more broad, like: "packets dropped due to transient lack of > resources, such as buffer space, host descriptors etc." > > For context why not just go with virtio spec definition of "no > descriptors" - for HW devices, what exact point in the pipeline drops > depends on how back pressure is configured/implemented, and fetching > descriptors is high latency, so differentiating between "PCIe is slow" > and "host didn't post descriptors" is hard in practice. > Call it hw-rx-drop-overruns ? OK. > > > +static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { > > + VIRTNET_STATS_DESC(tx, basic, packets), > > + VIRTNET_STATS_DESC(tx, basic, bytes), > > + > > + VIRTNET_STATS_DESC(tx, basic, notifications), > > + VIRTNET_STATS_DESC(tx, basic, interrupts), > > + > > + VIRTNET_STATS_DESC(tx, basic, drops), > > These 5 same as rx. > > > + VIRTNET_STATS_DESC(tx, basic, drop_malformed), > > These I'd call hw-tx-drop-errors, "packets dropped because they were > invalid or malformed"? OK. > > > +static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { > > + VIRTNET_STATS_DESC(rx, csum, csum_valid), > > I think in kernel parlance that would translate to CHECKSUM_UNNECESSARY? > So let's call it rx-csum-unnecessary ? > I'd skip the hw- prefix for this one, it doesn't matter to the user if > the HW or SW counted it. OK. > > > + VIRTNET_STATS_DESC(rx, csum, needs_csum), > > Hm, I think this is a bit software/virt device specific, presumably > rx-csum-partial for the kernel, up to you whether to make it ethtool -S > or netlink. YES. This is specific for virt device. I will make it ethtool -S. So somebody has other advice. > > > + VIRTNET_STATS_DESC(rx, csum, csum_none), > > + VIRTNET_STATS_DESC(rx, csum, csum_bad), > > These two make sense as is in netlink, should be fairly commonly > reported by devices. Maybe add a note in "bad" that packets with > bad csum are not discarded, but still delivered to the stack. OK. > > > +static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc[] = { > > + VIRTNET_STATS_DESC(tx, csum, needs_csum), > > + VIRTNET_STATS_DESC(tx, csum, csum_none), > > tx- version of what names we pick for rx-, netlink seems appropriate. > > > +static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc[] = { > > + VIRTNET_STATS_DESC(rx, gso, gso_packets), > > + VIRTNET_STATS_DESC(rx, gso, gso_bytes), > > I used the term "GSO" in conversations about Rx and it often confuses > people. Let's use "GRO", so hw-gro-packets, and hw-gro-bytes ? > Or maybe coalesce? "hw-rx-coalesce" ? That's quite a bit longer.. GRO may also confuse people. I like hw-rx-coalesce-packets, hw-rx-coalesce-bytes. > > Ah, and please mention in the doc that these counters "do not cover LRO > i.e. any coalescing implementation which doesn't follow GRO rules". OK. > > > + VIRTNET_STATS_DESC(rx, gso, gso_packets_coalesced), > > hw-gro-wire-packets ? > No strong preference on the naming, but I find that saying -wire > makes it 100% clear to everyone what the meaning is. ok. > > > + VIRTNET_STATS_DESC(rx, gso, gso_bytes_coalesced), > > The documentation in the virtio spec seems to be identical > to the one for gso_packets, which gotta be unintentional? One for num, one for bytes. > I'm guessing this is hw-gro-wire-bytes? I.e. headers counted > multiple times? This is used to count the bytes of the small packets before coalescing. > > +static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { > > + VIRTNET_STATS_DESC(tx, gso, gso_packets), > > + VIRTNET_STATS_DESC(tx, gso, gso_bytes), > > + VIRTNET_STATS_DESC(tx, gso, gso_segments), > > + VIRTNET_STATS_DESC(tx, gso, gso_segments_bytes), > > these 4 make sense as mirror of the Rx > > > + VIRTNET_STATS_DESC(tx, gso, gso_packets_noseg), > > + VIRTNET_STATS_DESC(tx, gso, gso_bytes_noseg), > > Not sure what these are :) unless someone knows what it is and that > HW devices report it, let's keep them in ethtool -S ? Just for the virtio. Let's keep them in ethtool -S. > > > +static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { > > + VIRTNET_STATS_DESC(rx, speed, packets_allowance_exceeded), > > hw-rx-drop-ratelimits ? > "Allowance exceeded" is a bit of a mouthful to me, perhaps others > disagree. The description from the virtio spec is quite good. OK. > > > + VIRTNET_STATS_DESC(rx, speed, bytes_allowance_exceeded), > > No strong preference whether to expose this as a standard stat or > ethtool -S, we don't generally keep byte counters for drops, so > this would be special. OK. > > > +static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { > > + VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded), > > + VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded), > > same as rx Thanks.
On Mon, 11 Mar 2024 18:48:45 +0800 Xuan Zhuo wrote: > On Thu, 7 Mar 2024 08:50:21 -0800, Jakub Kicinski <kuba@kernel.org> wrote: > > CC: Willem and some driver folks for more input, context: extending > > https://lore.kernel.org/all/20240306195509.1502746-1-kuba@kernel.org/ > > to cover virtio stats. > > > > On Tue, 27 Feb 2024 16:03:00 +0800 Xuan Zhuo wrote: > > > +static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { > > > + VIRTNET_STATS_DESC(rx, basic, packets), > > > + VIRTNET_STATS_DESC(rx, basic, bytes), > > > > Covered. > > About "packets" and "bytes", here is coming from the hw device. > Actually the driver also count "packets" and "bytes" in SW. > So there are HW and SW versions. Do we need to distinguish them? Yup, there are already separate counters defined for SW and HW packets / bytes. For the feature specific counters I don't think we need to have both SW and HW flavors defined. But for pure rx / tx packets / bytes users may want to see both. > > > +static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc[] = { > > > + VIRTNET_STATS_DESC(rx, gso, gso_packets), > > > + VIRTNET_STATS_DESC(rx, gso, gso_bytes), > > > > I used the term "GSO" in conversations about Rx and it often confuses > > people. Let's use "GRO", so hw-gro-packets, and hw-gro-bytes ? > > Or maybe coalesce? "hw-rx-coalesce" ? That's quite a bit longer.. > > GRO may also confuse people. > > I like hw-rx-coalesce-packets, hw-rx-coalesce-bytes. FWIW the HW offload feature in ethtool -k is called 'rx-gro-hw', but we can use "hw-rx-coalesce-*" and mention the feature in the documentation.
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index af512d85cd5b..5549fc8508bd 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -128,6 +128,121 @@ static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { #define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc) #define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc) +#define VIRTNET_STATS_DESC(qtype, class, name) \ + {#name, offsetof(struct virtio_net_stats_ ## qtype ## _ ## class, qtype ## _ ## name)} + +static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { + {"command_num", offsetof(struct virtio_net_stats_cvq, command_num)}, + {"ok_num", offsetof(struct virtio_net_stats_cvq, ok_num)} +}; + +static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { + VIRTNET_STATS_DESC(rx, basic, packets), + VIRTNET_STATS_DESC(rx, basic, bytes), + + VIRTNET_STATS_DESC(rx, basic, notifications), + VIRTNET_STATS_DESC(rx, basic, interrupts), + + VIRTNET_STATS_DESC(rx, basic, drops), + VIRTNET_STATS_DESC(rx, basic, drop_overruns), +}; + +static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { + VIRTNET_STATS_DESC(tx, basic, packets), + VIRTNET_STATS_DESC(tx, basic, bytes), + + VIRTNET_STATS_DESC(tx, basic, notifications), + VIRTNET_STATS_DESC(tx, basic, interrupts), + + VIRTNET_STATS_DESC(tx, basic, drops), + VIRTNET_STATS_DESC(tx, basic, drop_malformed), +}; + +static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { + VIRTNET_STATS_DESC(rx, csum, csum_valid), + VIRTNET_STATS_DESC(rx, csum, needs_csum), + + VIRTNET_STATS_DESC(rx, csum, csum_none), + VIRTNET_STATS_DESC(rx, csum, csum_bad), +}; + +static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc[] = { + VIRTNET_STATS_DESC(tx, csum, needs_csum), + VIRTNET_STATS_DESC(tx, csum, csum_none), +}; + +static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc[] = { + VIRTNET_STATS_DESC(rx, gso, gso_packets), + VIRTNET_STATS_DESC(rx, gso, gso_bytes), + VIRTNET_STATS_DESC(rx, gso, gso_packets_coalesced), + VIRTNET_STATS_DESC(rx, gso, gso_bytes_coalesced), +}; + +static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { + VIRTNET_STATS_DESC(tx, gso, gso_packets), + VIRTNET_STATS_DESC(tx, gso, gso_bytes), + VIRTNET_STATS_DESC(tx, gso, gso_segments), + VIRTNET_STATS_DESC(tx, gso, gso_segments_bytes), + VIRTNET_STATS_DESC(tx, gso, gso_packets_noseg), + VIRTNET_STATS_DESC(tx, gso, gso_bytes_noseg), +}; + +static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { + VIRTNET_STATS_DESC(rx, speed, packets_allowance_exceeded), + VIRTNET_STATS_DESC(rx, speed, bytes_allowance_exceeded), +}; + +static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { + VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded), + VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded), +}; + +struct virtnet_stats_map { + /* the stat type in bitmap */ + u64 stat_type; + + /* the bytes of the response for the stat */ + u32 len; + + /* the num of the response fields for the stat */ + u32 num; + +#define VIRTNET_STATS_Q_TYPE_RX 0 +#define VIRTNET_STATS_Q_TYPE_TX 1 +#define VIRTNET_STATS_Q_TYPE_CQ 2 + u32 queue_type; + + /* the reply type of the stat */ + u8 reply_type; + + /* describe the name and the offset in the response */ + const struct virtnet_stat_desc *desc; +}; + +#define VIRTNET_DEVICE_STATS_MAP_ITEM(TYPE, type, queue_type) \ + { \ + VIRTIO_NET_STATS_TYPE_##TYPE, \ + sizeof(struct virtio_net_stats_ ## type), \ + ARRAY_SIZE(virtnet_stats_ ## type ##_desc), \ + VIRTNET_STATS_Q_TYPE_##queue_type, \ + VIRTIO_NET_STATS_TYPE_REPLY_##TYPE, \ + &virtnet_stats_##type##_desc[0] \ + } + +static struct virtnet_stats_map virtio_net_stats_map[] = { + VIRTNET_DEVICE_STATS_MAP_ITEM(CVQ, cvq, CQ), + + VIRTNET_DEVICE_STATS_MAP_ITEM(RX_BASIC, rx_basic, RX), + VIRTNET_DEVICE_STATS_MAP_ITEM(RX_CSUM, rx_csum, RX), + VIRTNET_DEVICE_STATS_MAP_ITEM(RX_GSO, rx_gso, RX), + VIRTNET_DEVICE_STATS_MAP_ITEM(RX_SPEED, rx_speed, RX), + + VIRTNET_DEVICE_STATS_MAP_ITEM(TX_BASIC, tx_basic, TX), + VIRTNET_DEVICE_STATS_MAP_ITEM(TX_CSUM, tx_csum, TX), + VIRTNET_DEVICE_STATS_MAP_ITEM(TX_GSO, tx_gso, TX), + VIRTNET_DEVICE_STATS_MAP_ITEM(TX_SPEED, tx_speed, TX), +}; + struct virtnet_interrupt_coalesce { u32 max_packets; u32 max_usecs; @@ -244,6 +359,7 @@ struct control_buf { struct virtio_net_ctrl_coal_tx coal_tx; struct virtio_net_ctrl_coal_rx coal_rx; struct virtio_net_ctrl_coal_vq coal_vq; + struct virtio_net_stats_capabilities stats_cap; }; struct virtnet_info { @@ -329,6 +445,8 @@ struct virtnet_info { /* failover when STANDBY feature enabled */ struct failover *failover; + + u64 device_stats_cap; }; struct padded_vnet_hdr { @@ -3263,6 +3381,204 @@ static int virtnet_set_channels(struct net_device *dev, return err; } +static void virtnet_get_hw_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) +{ + struct virtnet_stats_map *m; + int i, j; + u8 *p = *data; + + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) + return; + + for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) { + m = &virtio_net_stats_map[i]; + + if (m->queue_type != type) + continue; + + if (!(vi->device_stats_cap & m->stat_type)) + continue; + + for (j = 0; j < m->num; ++j) { + if (type == VIRTNET_STATS_Q_TYPE_RX) + ethtool_sprintf(&p, "rx_queue_hw_%u_%s", qid, m->desc[j].desc); + + else if (type == VIRTNET_STATS_Q_TYPE_TX) + ethtool_sprintf(&p, "tx_queue_hw_%u_%s", qid, m->desc[j].desc); + + else if (type == VIRTNET_STATS_Q_TYPE_CQ) + ethtool_sprintf(&p, "cq_hw_%s", m->desc[j].desc); + } + } + + *data = p; +} + +struct virtnet_stats_ctx { + u32 num_cq; + u32 num_rx; + u32 num_tx; + + u64 bitmap_cq; + u64 bitmap_rx; + u64 bitmap_tx; + + u32 size_cq; + u32 size_rx; + u32 size_tx; + + u64 *data; +}; + +static void virtnet_stats_ctx_init(struct virtnet_info *vi, + struct virtnet_stats_ctx *ctx, + u64 *data) +{ + struct virtnet_stats_map *m; + int i; + + ctx->data = data; + + for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) { + m = &virtio_net_stats_map[i]; + + if (vi->device_stats_cap & m->stat_type) { + if (m->queue_type == VIRTNET_STATS_Q_TYPE_CQ) { + ctx->bitmap_cq |= m->stat_type; + ctx->num_cq += m->num; + ctx->size_cq += m->len; + } + + if (m->queue_type == VIRTNET_STATS_Q_TYPE_RX) { + ctx->bitmap_rx |= m->stat_type; + ctx->num_rx += m->num; + ctx->size_rx += m->len; + } + + if (m->queue_type == VIRTNET_STATS_Q_TYPE_TX) { + ctx->bitmap_tx |= m->stat_type; + ctx->num_tx += m->num; + ctx->size_tx += m->len; + } + } + } +} + +static int virtnet_get_hw_stats(struct virtnet_info *vi, + struct virtnet_stats_ctx *ctx) +{ + struct virtio_net_ctrl_queue_stats *req; + struct virtio_net_stats_reply_hdr *hdr; + struct scatterlist sgs_in, sgs_out; + u32 num_rx, num_tx, num_cq, offset; + int qnum, i, j, qid, res_size; + struct virtnet_stats_map *m; + void *reply, *p; + u64 bitmap; + int ok; + u64 *v; + + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) + return 0; + + qnum = 0; + if (ctx->bitmap_cq) + qnum += 1; + + if (ctx->bitmap_rx) + qnum += vi->curr_queue_pairs; + + if (ctx->bitmap_tx) + qnum += vi->curr_queue_pairs; + + req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + res_size = (ctx->size_rx + ctx->size_tx) * vi->curr_queue_pairs + ctx->size_cq; + reply = kmalloc(res_size, GFP_KERNEL); + if (!reply) { + kfree(req); + return -ENOMEM; + } + + j = 0; + for (i = 0; i < vi->curr_queue_pairs; ++i) { + if (ctx->bitmap_rx) { + req->stats[j].vq_index = cpu_to_le16(i * 2); + req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_rx); + ++j; + } + + if (ctx->bitmap_tx) { + req->stats[j].vq_index = cpu_to_le16(i * 2 + 1); + req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_tx); + ++j; + } + } + + if (ctx->size_cq) { + req->stats[j].vq_index = cpu_to_le16(vi->max_queue_pairs * 2); + req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_cq); + ++j; + } + + sg_init_one(&sgs_out, req, sizeof(*req) * j); + sg_init_one(&sgs_in, reply, res_size); + + ok = virtnet_send_command(vi, VIRTIO_NET_CTRL_STATS, + VIRTIO_NET_CTRL_STATS_GET, + &sgs_out, &sgs_in); + kfree(req); + + if (!ok) { + kfree(reply); + return ok; + } + + num_rx = VIRTNET_RQ_STATS_LEN + ctx->num_rx; + num_tx = VIRTNET_SQ_STATS_LEN + ctx->num_tx; + num_cq = ctx->num_tx; + + for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { + hdr = p; + + qid = le16_to_cpu(hdr->vq_index); + + if (qid == vi->max_queue_pairs * 2) { + offset = 0; + bitmap = ctx->bitmap_cq; + } else if (qid % 2) { + offset = num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); + offset += VIRTNET_SQ_STATS_LEN; + bitmap = ctx->bitmap_tx; + } else { + offset = num_cq + num_rx * (qid / 2) + VIRTNET_RQ_STATS_LEN; + bitmap = ctx->bitmap_rx; + } + + for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) { + m = &virtio_net_stats_map[i]; + + if (m->stat_type & bitmap) + offset += m->num; + + if (hdr->type != m->reply_type) + continue; + + for (j = 0; j < m->num; ++j) { + v = p + m->desc[j].offset; + ctx->data[offset + j] = le64_to_cpu(*v); + } + + break; + } + } + + kfree(reply); + return 0; +} + static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct virtnet_info *vi = netdev_priv(dev); @@ -3271,16 +3587,22 @@ static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) switch (stringset) { case ETH_SS_STATS: + virtnet_get_hw_stats_string(vi, VIRTNET_STATS_Q_TYPE_CQ, 0, &p); + for (i = 0; i < vi->curr_queue_pairs; i++) { for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) ethtool_sprintf(&p, "rx_queue_%u_%s", i, virtnet_rq_stats_desc[j].desc); + + virtnet_get_hw_stats_string(vi, VIRTNET_STATS_Q_TYPE_RX, i, &p); } for (i = 0; i < vi->curr_queue_pairs; i++) { for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) ethtool_sprintf(&p, "tx_queue_%u_%s", i, virtnet_sq_stats_desc[j].desc); + + virtnet_get_hw_stats_string(vi, VIRTNET_STATS_Q_TYPE_TX, i, &p); } break; } @@ -3289,11 +3611,35 @@ static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) static int virtnet_get_sset_count(struct net_device *dev, int sset) { struct virtnet_info *vi = netdev_priv(dev); + struct virtnet_stats_ctx ctx = {0}; + u32 pair_count; switch (sset) { case ETH_SS_STATS: - return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN + - VIRTNET_SQ_STATS_LEN); + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS) && + !vi->device_stats_cap) { + struct scatterlist sg; + + sg_init_one(&sg, &vi->ctrl->stats_cap, sizeof(vi->ctrl->stats_cap)); + + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_STATS, + VIRTIO_NET_CTRL_STATS_QUERY, + NULL, &sg)) { + dev_warn(&dev->dev, "Fail to get stats capability\n"); + } else { + __le64 v; + + v = vi->ctrl->stats_cap.supported_stats_types[0]; + vi->device_stats_cap = le64_to_cpu(v); + } + } + + virtnet_stats_ctx_init(vi, &ctx, NULL); + + pair_count = VIRTNET_RQ_STATS_LEN + VIRTNET_SQ_STATS_LEN; + pair_count += ctx.num_rx + ctx.num_tx; + + return ctx.num_cq + vi->curr_queue_pairs * pair_count; default: return -EOPNOTSUPP; } @@ -3303,11 +3649,17 @@ static void virtnet_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct virtnet_info *vi = netdev_priv(dev); - unsigned int idx = 0, start, i, j; + struct virtnet_stats_ctx ctx = {0}; + unsigned int idx, start, i, j; const u8 *stats_base; const u64_stats_t *p; size_t offset; + virtnet_stats_ctx_init(vi, &ctx, data); + virtnet_get_hw_stats(vi, &ctx); + + idx = ctx.num_cq; + for (i = 0; i < vi->curr_queue_pairs; i++) { struct receive_queue *rq = &vi->rq[i]; @@ -3321,6 +3673,7 @@ static void virtnet_get_ethtool_stats(struct net_device *dev, } } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); idx += VIRTNET_RQ_STATS_LEN; + idx += ctx.num_rx; } for (i = 0; i < vi->curr_queue_pairs; i++) { @@ -3336,6 +3689,7 @@ static void virtnet_get_ethtool_stats(struct net_device *dev, } } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); idx += VIRTNET_SQ_STATS_LEN; + idx += ctx.num_tx; } } @@ -4963,7 +5317,7 @@ static struct virtio_device_id id_table[] = { VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ VIRTIO_NET_F_VQ_NOTF_COAL, \ - VIRTIO_NET_F_GUEST_HDRLEN + VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS static unsigned int features[] = { VIRTNET_FEATURES,
As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> --- drivers/net/virtio_net.c | 362 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 358 insertions(+), 4 deletions(-)