diff mbox series

[net-next,v3,3/6] virtio_net: support device stats

Message ID 20240227080303.63894-4-xuanzhuo@linux.alibaba.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series virtio-net: support device stats | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 940 this patch: 941
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 8 of 8 maintainers
netdev/build_clang success Errors and warnings before: 957 this patch: 957
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 957 this patch: 958
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Xuan Zhuo Feb. 27, 2024, 8:03 a.m. UTC
As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82

make virtio-net support getting the stats from the device by ethtool -S
<eth0>.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/net/virtio_net.c | 362 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 358 insertions(+), 4 deletions(-)

Comments

Jiri Pirko Feb. 27, 2024, 2:56 p.m. UTC | #1
Tue, Feb 27, 2024 at 09:03:00AM CET, xuanzhuo@linux.alibaba.com wrote:
>As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82
>
>make virtio-net support getting the stats from the device by ethtool -S
><eth0>.

Would be nice to have example output of this command included here as
well as in the cover letter.


>
>Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
>---
> drivers/net/virtio_net.c | 362 ++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 358 insertions(+), 4 deletions(-)
>
>diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>index af512d85cd5b..5549fc8508bd 100644
>--- a/drivers/net/virtio_net.c
>+++ b/drivers/net/virtio_net.c
>@@ -128,6 +128,121 @@ static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
> #define VIRTNET_SQ_STATS_LEN	ARRAY_SIZE(virtnet_sq_stats_desc)
> #define VIRTNET_RQ_STATS_LEN	ARRAY_SIZE(virtnet_rq_stats_desc)
> 
>+#define VIRTNET_STATS_DESC(qtype, class, name) \
>+	{#name, offsetof(struct virtio_net_stats_ ## qtype ## _ ## class, qtype ## _ ## name)}
>+
>+static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = {
>+	{"command_num", offsetof(struct virtio_net_stats_cvq, command_num)},
>+	{"ok_num", offsetof(struct virtio_net_stats_cvq, ok_num)}
>+};
>+
>+static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = {
>+	VIRTNET_STATS_DESC(rx, basic, packets),
>+	VIRTNET_STATS_DESC(rx, basic, bytes),
>+
>+	VIRTNET_STATS_DESC(rx, basic, notifications),
>+	VIRTNET_STATS_DESC(rx, basic, interrupts),
>+
>+	VIRTNET_STATS_DESC(rx, basic, drops),
>+	VIRTNET_STATS_DESC(rx, basic, drop_overruns),
>+};
>+
>+static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = {
>+	VIRTNET_STATS_DESC(tx, basic, packets),
>+	VIRTNET_STATS_DESC(tx, basic, bytes),
>+
>+	VIRTNET_STATS_DESC(tx, basic, notifications),
>+	VIRTNET_STATS_DESC(tx, basic, interrupts),
>+
>+	VIRTNET_STATS_DESC(tx, basic, drops),
>+	VIRTNET_STATS_DESC(tx, basic, drop_malformed),
>+};
>+
>+static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = {
>+	VIRTNET_STATS_DESC(rx, csum, csum_valid),
>+	VIRTNET_STATS_DESC(rx, csum, needs_csum),
>+
>+	VIRTNET_STATS_DESC(rx, csum, csum_none),
>+	VIRTNET_STATS_DESC(rx, csum, csum_bad),
>+};
>+
>+static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc[] = {
>+	VIRTNET_STATS_DESC(tx, csum, needs_csum),
>+	VIRTNET_STATS_DESC(tx, csum, csum_none),
>+};
>+
>+static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc[] = {
>+	VIRTNET_STATS_DESC(rx, gso, gso_packets),
>+	VIRTNET_STATS_DESC(rx, gso, gso_bytes),
>+	VIRTNET_STATS_DESC(rx, gso, gso_packets_coalesced),
>+	VIRTNET_STATS_DESC(rx, gso, gso_bytes_coalesced),
>+};
>+
>+static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = {
>+	VIRTNET_STATS_DESC(tx, gso, gso_packets),
>+	VIRTNET_STATS_DESC(tx, gso, gso_bytes),
>+	VIRTNET_STATS_DESC(tx, gso, gso_segments),
>+	VIRTNET_STATS_DESC(tx, gso, gso_segments_bytes),
>+	VIRTNET_STATS_DESC(tx, gso, gso_packets_noseg),
>+	VIRTNET_STATS_DESC(tx, gso, gso_bytes_noseg),
>+};
>+
>+static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = {
>+	VIRTNET_STATS_DESC(rx, speed, packets_allowance_exceeded),
>+	VIRTNET_STATS_DESC(rx, speed, bytes_allowance_exceeded),
>+};
>+
>+static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = {
>+	VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded),
>+	VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded),
>+};
>+
>+struct virtnet_stats_map {
>+	/* the stat type in bitmap */
>+	u64 stat_type;
>+
>+	/* the bytes of the response for the stat */
>+	u32 len;
>+
>+	/* the num of the response fields for the stat */
>+	u32 num;
>+
>+#define VIRTNET_STATS_Q_TYPE_RX 0
>+#define VIRTNET_STATS_Q_TYPE_TX 1
>+#define VIRTNET_STATS_Q_TYPE_CQ 2

Enum? Then you don't need to have it here in struct but above it.


>+	u32 queue_type;
>+
>+	/* the reply type of the stat */
>+	u8 reply_type;
>+
>+	/* describe the name and the offset in the response */
>+	const struct virtnet_stat_desc *desc;
>+};
>+
>+#define VIRTNET_DEVICE_STATS_MAP_ITEM(TYPE, type, queue_type)	\
>+	{							\
>+		VIRTIO_NET_STATS_TYPE_##TYPE,			\
>+		sizeof(struct virtio_net_stats_ ## type),	\
>+		ARRAY_SIZE(virtnet_stats_ ## type ##_desc),	\
>+		VIRTNET_STATS_Q_TYPE_##queue_type,		\
>+		VIRTIO_NET_STATS_TYPE_REPLY_##TYPE,		\
>+		&virtnet_stats_##type##_desc[0]			\
>+	}
>+
>+static struct virtnet_stats_map virtio_net_stats_map[] = {
>+	VIRTNET_DEVICE_STATS_MAP_ITEM(CVQ, cvq, CQ),
>+
>+	VIRTNET_DEVICE_STATS_MAP_ITEM(RX_BASIC, rx_basic, RX),
>+	VIRTNET_DEVICE_STATS_MAP_ITEM(RX_CSUM,  rx_csum,  RX),
>+	VIRTNET_DEVICE_STATS_MAP_ITEM(RX_GSO,   rx_gso,   RX),
>+	VIRTNET_DEVICE_STATS_MAP_ITEM(RX_SPEED, rx_speed, RX),
>+
>+	VIRTNET_DEVICE_STATS_MAP_ITEM(TX_BASIC, tx_basic, TX),
>+	VIRTNET_DEVICE_STATS_MAP_ITEM(TX_CSUM,  tx_csum,  TX),
>+	VIRTNET_DEVICE_STATS_MAP_ITEM(TX_GSO,   tx_gso,   TX),
>+	VIRTNET_DEVICE_STATS_MAP_ITEM(TX_SPEED, tx_speed, TX),
>+};
>+
> struct virtnet_interrupt_coalesce {
> 	u32 max_packets;
> 	u32 max_usecs;
>@@ -244,6 +359,7 @@ struct control_buf {
> 	struct virtio_net_ctrl_coal_tx coal_tx;
> 	struct virtio_net_ctrl_coal_rx coal_rx;
> 	struct virtio_net_ctrl_coal_vq coal_vq;
>+	struct virtio_net_stats_capabilities stats_cap;
> };
> 
> struct virtnet_info {
>@@ -329,6 +445,8 @@ struct virtnet_info {
> 
> 	/* failover when STANDBY feature enabled */
> 	struct failover *failover;
>+
>+	u64 device_stats_cap;
> };
> 
> struct padded_vnet_hdr {
>@@ -3263,6 +3381,204 @@ static int virtnet_set_channels(struct net_device *dev,
> 	return err;
> }
> 
>+static void virtnet_get_hw_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data)
>+{
>+	struct virtnet_stats_map *m;
>+	int i, j;
>+	u8 *p = *data;

Reverse christmas tree:
https://www.kernel.org/doc/html/v6.6/process/maintainer-netdev.html#tl-dr


>+
>+	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS))
>+		return;
>+
>+	for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) {
>+		m = &virtio_net_stats_map[i];
>+
>+		if (m->queue_type != type)
>+			continue;
>+
>+		if (!(vi->device_stats_cap & m->stat_type))
>+			continue;
>+
>+		for (j = 0; j < m->num; ++j) {
>+			if (type == VIRTNET_STATS_Q_TYPE_RX)
>+				ethtool_sprintf(&p, "rx_queue_hw_%u_%s", qid, m->desc[j].desc);
>+
>+			else if (type == VIRTNET_STATS_Q_TYPE_TX)
>+				ethtool_sprintf(&p, "tx_queue_hw_%u_%s", qid, m->desc[j].desc);
>+
>+			else if (type == VIRTNET_STATS_Q_TYPE_CQ)
>+				ethtool_sprintf(&p, "cq_hw_%s", m->desc[j].desc);

Switch-case?


>+		}
>+	}
>+
>+	*data = p;
>+}
>+
>+struct virtnet_stats_ctx {
>+	u32 num_cq;
>+	u32 num_rx;
>+	u32 num_tx;
>+
>+	u64 bitmap_cq;
>+	u64 bitmap_rx;
>+	u64 bitmap_tx;
>+
>+	u32 size_cq;
>+	u32 size_rx;
>+	u32 size_tx;
>+
>+	u64 *data;
>+};
>+
>+static void virtnet_stats_ctx_init(struct virtnet_info *vi,
>+				   struct virtnet_stats_ctx *ctx,
>+				   u64 *data)
>+{
>+	struct virtnet_stats_map *m;
>+	int i;
>+
>+	ctx->data = data;
>+
>+	for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) {
>+		m = &virtio_net_stats_map[i];
>+
>+		if (vi->device_stats_cap & m->stat_type) {

if (!(vi->device_stats_cap & m->stat_type)
    continue;
would let you save one level of indent below.

>+			if (m->queue_type == VIRTNET_STATS_Q_TYPE_CQ) {
>+				ctx->bitmap_cq |= m->stat_type;
>+				ctx->num_cq += m->num;
>+				ctx->size_cq += m->len;
>+			}
>+
>+			if (m->queue_type == VIRTNET_STATS_Q_TYPE_RX) {
>+				ctx->bitmap_rx |= m->stat_type;
>+				ctx->num_rx += m->num;
>+				ctx->size_rx += m->len;
>+			}
>+
>+			if (m->queue_type == VIRTNET_STATS_Q_TYPE_TX) {
>+				ctx->bitmap_tx |= m->stat_type;
>+				ctx->num_tx += m->num;
>+				ctx->size_tx += m->len;
>+			}

Switch-case?


>+		}
>+	}
>+}
>+
>+static int virtnet_get_hw_stats(struct virtnet_info *vi,
>+				struct virtnet_stats_ctx *ctx)
>+{
>+	struct virtio_net_ctrl_queue_stats *req;
>+	struct virtio_net_stats_reply_hdr *hdr;
>+	struct scatterlist sgs_in, sgs_out;
>+	u32 num_rx, num_tx, num_cq, offset;
>+	int qnum, i, j,  qid, res_size;
>+	struct virtnet_stats_map *m;
>+	void *reply, *p;
>+	u64 bitmap;
>+	int ok;
>+	u64 *v;

Single letter variables are always frowned-upon:
m, v, p. The non-iterator variables could have meaningful name. The code
is then much easier to follow. Could you name them please? This applies
to the rest of the code as well of course.


>+
>+	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS))
>+		return 0;
>+
>+	qnum = 0;
>+	if (ctx->bitmap_cq)
>+		qnum += 1;

qnum++ ?


>+
>+	if (ctx->bitmap_rx)
>+		qnum += vi->curr_queue_pairs;
>+
>+	if (ctx->bitmap_tx)
>+		qnum += vi->curr_queue_pairs;
>+
>+	req = kcalloc(qnum, sizeof(*req), GFP_KERNEL);
>+	if (!req)
>+		return -ENOMEM;
>+
>+	res_size = (ctx->size_rx + ctx->size_tx) * vi->curr_queue_pairs + ctx->size_cq;
>+	reply = kmalloc(res_size, GFP_KERNEL);
>+	if (!reply) {
>+		kfree(req);
>+		return -ENOMEM;
>+	}
>+
>+	j = 0;
>+	for (i = 0; i < vi->curr_queue_pairs; ++i) {
>+		if (ctx->bitmap_rx) {
>+			req->stats[j].vq_index = cpu_to_le16(i * 2);
>+			req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_rx);
>+			++j;
>+		}
>+
>+		if (ctx->bitmap_tx) {
>+			req->stats[j].vq_index = cpu_to_le16(i * 2 + 1);
>+			req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_tx);
>+			++j;
>+		}
>+	}
>+
>+	if (ctx->size_cq) {
>+		req->stats[j].vq_index = cpu_to_le16(vi->max_queue_pairs * 2);
>+		req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_cq);
>+		++j;
>+	}
>+
>+	sg_init_one(&sgs_out, req, sizeof(*req) * j);
>+	sg_init_one(&sgs_in, reply, res_size);
>+
>+	ok = virtnet_send_command(vi, VIRTIO_NET_CTRL_STATS,
>+				  VIRTIO_NET_CTRL_STATS_GET,
>+				  &sgs_out, &sgs_in);
>+	kfree(req);
>+
>+	if (!ok) {
>+		kfree(reply);
>+		return ok;

virtnet_send_command() returns bool. This function returns 0/-EXX.
Please fix the return value here. Or is it supposed to be 0? In that
case just return 0 here. But I think this should return error.


>+	}
>+
>+	num_rx = VIRTNET_RQ_STATS_LEN + ctx->num_rx;
>+	num_tx = VIRTNET_SQ_STATS_LEN + ctx->num_tx;
>+	num_cq = ctx->num_tx;
>+
>+	for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) {
>+		hdr = p;




>+
>+		qid = le16_to_cpu(hdr->vq_index);
>+
>+		if (qid == vi->max_queue_pairs * 2) {
>+			offset = 0;
>+			bitmap = ctx->bitmap_cq;
>+		} else if (qid % 2) {
>+			offset = num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2);
>+			offset += VIRTNET_SQ_STATS_LEN;
>+			bitmap = ctx->bitmap_tx;
>+		} else {
>+			offset = num_cq + num_rx * (qid / 2) + VIRTNET_RQ_STATS_LEN;
>+			bitmap = ctx->bitmap_rx;
>+		}
>+
>+		for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) {
>+			m = &virtio_net_stats_map[i];
>+
>+			if (m->stat_type & bitmap)
>+				offset += m->num;
>+
>+			if (hdr->type != m->reply_type)
>+				continue;
>+
>+			for (j = 0; j < m->num; ++j) {
>+				v = p + m->desc[j].offset;
>+				ctx->data[offset + j] = le64_to_cpu(*v);
>+			}
>+
>+			break;
>+		}
>+	}
>+
>+	kfree(reply);
>+	return 0;
>+}
>+
> static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
> {
> 	struct virtnet_info *vi = netdev_priv(dev);
>@@ -3271,16 +3587,22 @@ static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
> 
> 	switch (stringset) {
> 	case ETH_SS_STATS:
>+		virtnet_get_hw_stats_string(vi, VIRTNET_STATS_Q_TYPE_CQ, 0, &p);
>+
> 		for (i = 0; i < vi->curr_queue_pairs; i++) {
> 			for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++)
> 				ethtool_sprintf(&p, "rx_queue_%u_%s", i,
> 						virtnet_rq_stats_desc[j].desc);
>+
>+			virtnet_get_hw_stats_string(vi, VIRTNET_STATS_Q_TYPE_RX, i, &p);
> 		}
> 
> 		for (i = 0; i < vi->curr_queue_pairs; i++) {
> 			for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++)
> 				ethtool_sprintf(&p, "tx_queue_%u_%s", i,
> 						virtnet_sq_stats_desc[j].desc);
>+
>+			virtnet_get_hw_stats_string(vi, VIRTNET_STATS_Q_TYPE_TX, i, &p);
> 		}
> 		break;
> 	}
>@@ -3289,11 +3611,35 @@ static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
> static int virtnet_get_sset_count(struct net_device *dev, int sset)
> {
> 	struct virtnet_info *vi = netdev_priv(dev);
>+	struct virtnet_stats_ctx ctx = {0};
>+	u32 pair_count;
> 
> 	switch (sset) {
> 	case ETH_SS_STATS:
>-		return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN +
>-					       VIRTNET_SQ_STATS_LEN);
>+		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS) &&
>+		    !vi->device_stats_cap) {
>+			struct scatterlist sg;
>+
>+			sg_init_one(&sg, &vi->ctrl->stats_cap, sizeof(vi->ctrl->stats_cap));
>+
>+			if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_STATS,
>+						  VIRTIO_NET_CTRL_STATS_QUERY,
>+						  NULL, &sg)) {
>+				dev_warn(&dev->dev, "Fail to get stats capability\n");
>+			} else {
>+				__le64 v;
>+
>+				v = vi->ctrl->stats_cap.supported_stats_types[0];
>+				vi->device_stats_cap = le64_to_cpu(v);
>+			}
>+		}
>+
>+		virtnet_stats_ctx_init(vi, &ctx, NULL);
>+
>+		pair_count = VIRTNET_RQ_STATS_LEN + VIRTNET_SQ_STATS_LEN;
>+		pair_count += ctx.num_rx + ctx.num_tx;
>+
>+		return ctx.num_cq + vi->curr_queue_pairs * pair_count;
> 	default:
> 		return -EOPNOTSUPP;
> 	}
>@@ -3303,11 +3649,17 @@ static void virtnet_get_ethtool_stats(struct net_device *dev,
> 				      struct ethtool_stats *stats, u64 *data)
> {
> 	struct virtnet_info *vi = netdev_priv(dev);
>-	unsigned int idx = 0, start, i, j;
>+	struct virtnet_stats_ctx ctx = {0};
>+	unsigned int idx, start, i, j;
> 	const u8 *stats_base;
> 	const u64_stats_t *p;
> 	size_t offset;
> 
>+	virtnet_stats_ctx_init(vi, &ctx, data);
>+	virtnet_get_hw_stats(vi, &ctx);

Check the function return value. Print out an error in case there is one
at least.

Btw, did you consider obtaining these stats asynchronously?


>+
>+	idx = ctx.num_cq;
>+
> 	for (i = 0; i < vi->curr_queue_pairs; i++) {
> 		struct receive_queue *rq = &vi->rq[i];
> 
>@@ -3321,6 +3673,7 @@ static void virtnet_get_ethtool_stats(struct net_device *dev,
> 			}
> 		} while (u64_stats_fetch_retry(&rq->stats.syncp, start));
> 		idx += VIRTNET_RQ_STATS_LEN;
>+		idx += ctx.num_rx;
> 	}
> 
> 	for (i = 0; i < vi->curr_queue_pairs; i++) {
>@@ -3336,6 +3689,7 @@ static void virtnet_get_ethtool_stats(struct net_device *dev,
> 			}
> 		} while (u64_stats_fetch_retry(&sq->stats.syncp, start));
> 		idx += VIRTNET_SQ_STATS_LEN;
>+		idx += ctx.num_tx;
> 	}
> }
> 
>@@ -4963,7 +5317,7 @@ static struct virtio_device_id id_table[] = {
> 	VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
> 	VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \
> 	VIRTIO_NET_F_VQ_NOTF_COAL, \
>-	VIRTIO_NET_F_GUEST_HDRLEN
>+	VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS
> 
> static unsigned int features[] = {
> 	VIRTNET_FEATURES,
>-- 
>2.32.0.3.g01195cf9f
>
>
Simon Horman Feb. 27, 2024, 7:19 p.m. UTC | #2
On Tue, Feb 27, 2024 at 04:03:00PM +0800, Xuan Zhuo wrote:
> As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82
> 
> make virtio-net support getting the stats from the device by ethtool -S
> <eth0>.
> 
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>

...

> +static int virtnet_get_hw_stats(struct virtnet_info *vi,
> +				struct virtnet_stats_ctx *ctx)
> +{
> +	struct virtio_net_ctrl_queue_stats *req;
> +	struct virtio_net_stats_reply_hdr *hdr;
> +	struct scatterlist sgs_in, sgs_out;
> +	u32 num_rx, num_tx, num_cq, offset;
> +	int qnum, i, j,  qid, res_size;
> +	struct virtnet_stats_map *m;
> +	void *reply, *p;
> +	u64 bitmap;
> +	int ok;
> +	u64 *v;
> +
> +	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS))
> +		return 0;
> +
> +	qnum = 0;
> +	if (ctx->bitmap_cq)
> +		qnum += 1;
> +
> +	if (ctx->bitmap_rx)
> +		qnum += vi->curr_queue_pairs;
> +
> +	if (ctx->bitmap_tx)
> +		qnum += vi->curr_queue_pairs;
> +
> +	req = kcalloc(qnum, sizeof(*req), GFP_KERNEL);
> +	if (!req)
> +		return -ENOMEM;
> +
> +	res_size = (ctx->size_rx + ctx->size_tx) * vi->curr_queue_pairs + ctx->size_cq;
> +	reply = kmalloc(res_size, GFP_KERNEL);
> +	if (!reply) {
> +		kfree(req);
> +		return -ENOMEM;
> +	}
> +
> +	j = 0;
> +	for (i = 0; i < vi->curr_queue_pairs; ++i) {
> +		if (ctx->bitmap_rx) {
> +			req->stats[j].vq_index = cpu_to_le16(i * 2);
> +			req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_rx);
> +			++j;
> +		}
> +
> +		if (ctx->bitmap_tx) {
> +			req->stats[j].vq_index = cpu_to_le16(i * 2 + 1);
> +			req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_tx);
> +			++j;
> +		}
> +	}
> +
> +	if (ctx->size_cq) {
> +		req->stats[j].vq_index = cpu_to_le16(vi->max_queue_pairs * 2);
> +		req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_cq);
> +		++j;
> +	}
> +
> +	sg_init_one(&sgs_out, req, sizeof(*req) * j);
> +	sg_init_one(&sgs_in, reply, res_size);
> +
> +	ok = virtnet_send_command(vi, VIRTIO_NET_CTRL_STATS,
> +				  VIRTIO_NET_CTRL_STATS_GET,
> +				  &sgs_out, &sgs_in);
> +	kfree(req);
> +
> +	if (!ok) {
> +		kfree(reply);
> +		return ok;
> +	}
> +
> +	num_rx = VIRTNET_RQ_STATS_LEN + ctx->num_rx;
> +	num_tx = VIRTNET_SQ_STATS_LEN + ctx->num_tx;
> +	num_cq = ctx->num_tx;
> +
> +	for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) {
> +		hdr = p;
> +
> +		qid = le16_to_cpu(hdr->vq_index);
> +
> +		if (qid == vi->max_queue_pairs * 2) {
> +			offset = 0;
> +			bitmap = ctx->bitmap_cq;
> +		} else if (qid % 2) {
> +			offset = num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2);
> +			offset += VIRTNET_SQ_STATS_LEN;
> +			bitmap = ctx->bitmap_tx;
> +		} else {
> +			offset = num_cq + num_rx * (qid / 2) + VIRTNET_RQ_STATS_LEN;
> +			bitmap = ctx->bitmap_rx;
> +		}
> +
> +		for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) {
> +			m = &virtio_net_stats_map[i];
> +
> +			if (m->stat_type & bitmap)
> +				offset += m->num;
> +
> +			if (hdr->type != m->reply_type)
> +				continue;
> +
> +			for (j = 0; j < m->num; ++j) {
> +				v = p + m->desc[j].offset;
> +				ctx->data[offset + j] = le64_to_cpu(*v);

Hi Xuan Zhuo,

Sparse complains about the line above because the type of *v is u64,
but le64_to_cpu() expects __le64.

> +			}
> +
> +			break;
> +		}
> +	}
> +
> +	kfree(reply);
> +	return 0;
> +}
> +
>  static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
>  {
>  	struct virtnet_info *vi = netdev_priv(dev);

...
kernel test robot Feb. 29, 2024, 10:35 a.m. UTC | #3
Hi Xuan,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Xuan-Zhuo/virtio_net-introduce-device-stats-feature-and-structures/20240227-161123
base:   net-next/main
patch link:    https://lore.kernel.org/r/20240227080303.63894-4-xuanzhuo%40linux.alibaba.com
patch subject: [PATCH net-next v3 3/6] virtio_net: support device stats
config: x86_64-randconfig-121-20240229 (https://download.01.org/0day-ci/archive/20240229/202402291808.cmzZAiYX-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240229/202402291808.cmzZAiYX-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202402291808.cmzZAiYX-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> drivers/net/virtio_net.c:3571:57: sparse: sparse: cast to restricted __le64

vim +3571 drivers/net/virtio_net.c

  3466	
  3467	static int virtnet_get_hw_stats(struct virtnet_info *vi,
  3468					struct virtnet_stats_ctx *ctx)
  3469	{
  3470		struct virtio_net_ctrl_queue_stats *req;
  3471		struct virtio_net_stats_reply_hdr *hdr;
  3472		struct scatterlist sgs_in, sgs_out;
  3473		u32 num_rx, num_tx, num_cq, offset;
  3474		int qnum, i, j,  qid, res_size;
  3475		struct virtnet_stats_map *m;
  3476		void *reply, *p;
  3477		u64 bitmap;
  3478		int ok;
  3479		u64 *v;
  3480	
  3481		if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS))
  3482			return 0;
  3483	
  3484		qnum = 0;
  3485		if (ctx->bitmap_cq)
  3486			qnum += 1;
  3487	
  3488		if (ctx->bitmap_rx)
  3489			qnum += vi->curr_queue_pairs;
  3490	
  3491		if (ctx->bitmap_tx)
  3492			qnum += vi->curr_queue_pairs;
  3493	
  3494		req = kcalloc(qnum, sizeof(*req), GFP_KERNEL);
  3495		if (!req)
  3496			return -ENOMEM;
  3497	
  3498		res_size = (ctx->size_rx + ctx->size_tx) * vi->curr_queue_pairs + ctx->size_cq;
  3499		reply = kmalloc(res_size, GFP_KERNEL);
  3500		if (!reply) {
  3501			kfree(req);
  3502			return -ENOMEM;
  3503		}
  3504	
  3505		j = 0;
  3506		for (i = 0; i < vi->curr_queue_pairs; ++i) {
  3507			if (ctx->bitmap_rx) {
  3508				req->stats[j].vq_index = cpu_to_le16(i * 2);
  3509				req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_rx);
  3510				++j;
  3511			}
  3512	
  3513			if (ctx->bitmap_tx) {
  3514				req->stats[j].vq_index = cpu_to_le16(i * 2 + 1);
  3515				req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_tx);
  3516				++j;
  3517			}
  3518		}
  3519	
  3520		if (ctx->size_cq) {
  3521			req->stats[j].vq_index = cpu_to_le16(vi->max_queue_pairs * 2);
  3522			req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_cq);
  3523			++j;
  3524		}
  3525	
  3526		sg_init_one(&sgs_out, req, sizeof(*req) * j);
  3527		sg_init_one(&sgs_in, reply, res_size);
  3528	
  3529		ok = virtnet_send_command(vi, VIRTIO_NET_CTRL_STATS,
  3530					  VIRTIO_NET_CTRL_STATS_GET,
  3531					  &sgs_out, &sgs_in);
  3532		kfree(req);
  3533	
  3534		if (!ok) {
  3535			kfree(reply);
  3536			return ok;
  3537		}
  3538	
  3539		num_rx = VIRTNET_RQ_STATS_LEN + ctx->num_rx;
  3540		num_tx = VIRTNET_SQ_STATS_LEN + ctx->num_tx;
  3541		num_cq = ctx->num_tx;
  3542	
  3543		for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) {
  3544			hdr = p;
  3545	
  3546			qid = le16_to_cpu(hdr->vq_index);
  3547	
  3548			if (qid == vi->max_queue_pairs * 2) {
  3549				offset = 0;
  3550				bitmap = ctx->bitmap_cq;
  3551			} else if (qid % 2) {
  3552				offset = num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2);
  3553				offset += VIRTNET_SQ_STATS_LEN;
  3554				bitmap = ctx->bitmap_tx;
  3555			} else {
  3556				offset = num_cq + num_rx * (qid / 2) + VIRTNET_RQ_STATS_LEN;
  3557				bitmap = ctx->bitmap_rx;
  3558			}
  3559	
  3560			for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) {
  3561				m = &virtio_net_stats_map[i];
  3562	
  3563				if (m->stat_type & bitmap)
  3564					offset += m->num;
  3565	
  3566				if (hdr->type != m->reply_type)
  3567					continue;
  3568	
  3569				for (j = 0; j < m->num; ++j) {
  3570					v = p + m->desc[j].offset;
> 3571					ctx->data[offset + j] = le64_to_cpu(*v);
  3572				}
  3573	
  3574				break;
  3575			}
  3576		}
  3577	
  3578		kfree(reply);
  3579		return 0;
  3580	}
  3581
Jakub Kicinski March 7, 2024, 4:50 p.m. UTC | #4
CC: Willem and some driver folks for more input, context: extending
https://lore.kernel.org/all/20240306195509.1502746-1-kuba@kernel.org/
to cover virtio stats.

On Tue, 27 Feb 2024 16:03:00 +0800 Xuan Zhuo wrote:
> +static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = {
> +	VIRTNET_STATS_DESC(rx, basic, packets),
> +	VIRTNET_STATS_DESC(rx, basic, bytes),

Covered.

> +	VIRTNET_STATS_DESC(rx, basic, notifications),
> +	VIRTNET_STATS_DESC(rx, basic, interrupts),

I haven't seen HW devices count interrupts coming from a specific
queue (there's usually a lot more queues than IRQs these days),
let's keep these in ethtool -S for now, unless someone has a HW use
case.

> +	VIRTNET_STATS_DESC(rx, basic, drops),
> +	VIRTNET_STATS_DESC(rx, basic, drop_overruns),

These are important, but we need to make sure we have a good definition
for vendors to follow...

drops I'd define as "sum of all packets which came into the device, but
never left it, including but not limited to: packets dropped due to
lack of buffer space, processing errors, explicitly set policies and
packet filters." 
Call it hw-rx-drops ?

overruns is a bit harder to precisely define. I was thinking of
something more broad, like: "packets dropped due to transient lack of
resources, such as buffer space, host descriptors etc."

For context why not just go with virtio spec definition of "no
descriptors" - for HW devices, what exact point in the pipeline drops
depends on how back pressure is configured/implemented, and fetching
descriptors is high latency, so differentiating between "PCIe is slow"
and "host didn't post descriptors" is hard in practice.
Call it hw-rx-drop-overruns ?

> +static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = {
> +	VIRTNET_STATS_DESC(tx, basic, packets),
> +	VIRTNET_STATS_DESC(tx, basic, bytes),
> +
> +	VIRTNET_STATS_DESC(tx, basic, notifications),
> +	VIRTNET_STATS_DESC(tx, basic, interrupts),
> +
> +	VIRTNET_STATS_DESC(tx, basic, drops),

These 5 same as rx.

> +	VIRTNET_STATS_DESC(tx, basic, drop_malformed),

These I'd call hw-tx-drop-errors, "packets dropped because they were
invalid or malformed"?

> +static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = {
> +	VIRTNET_STATS_DESC(rx, csum, csum_valid),

I think in kernel parlance that would translate to CHECKSUM_UNNECESSARY?
So let's call it rx-csum-unnecessary ?
I'd skip the hw- prefix for this one, it doesn't matter to the user if
the HW or SW counted it.

> +	VIRTNET_STATS_DESC(rx, csum, needs_csum),

Hm, I think this is a bit software/virt device specific, presumably
rx-csum-partial for the kernel, up to you whether to make it ethtool -S
or netlink.

> +	VIRTNET_STATS_DESC(rx, csum, csum_none),
> +	VIRTNET_STATS_DESC(rx, csum, csum_bad),

These two make sense as is in netlink, should be fairly commonly
reported by devices. Maybe add a note in "bad" that packets with
bad csum are not discarded, but still delivered to the stack.

> +static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc[] = {
> +	VIRTNET_STATS_DESC(tx, csum, needs_csum),
> +	VIRTNET_STATS_DESC(tx, csum, csum_none),

tx- version of what names we pick for rx-, netlink seems appropriate.

> +static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc[] = {
> +	VIRTNET_STATS_DESC(rx, gso, gso_packets),
> +	VIRTNET_STATS_DESC(rx, gso, gso_bytes),

I used the term "GSO" in conversations about Rx and it often confuses
people. Let's use "GRO", so hw-gro-packets, and hw-gro-bytes ?
Or maybe coalesce? "hw-rx-coalesce" ? That's quite a bit longer..

Ah, and please mention in the doc that these counters "do not cover LRO
i.e. any coalescing implementation which doesn't follow GRO rules".

> +	VIRTNET_STATS_DESC(rx, gso, gso_packets_coalesced),

hw-gro-wire-packets ?
No strong preference on the naming, but I find that saying -wire
makes it 100% clear to everyone what the meaning is.

> +	VIRTNET_STATS_DESC(rx, gso, gso_bytes_coalesced),

The documentation in the virtio spec seems to be identical 
to the one for gso_packets, which gotta be unintentional?
I'm guessing this is hw-gro-wire-bytes? I.e. headers counted
multiple times?

> +static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = {
> +	VIRTNET_STATS_DESC(tx, gso, gso_packets),
> +	VIRTNET_STATS_DESC(tx, gso, gso_bytes),
> +	VIRTNET_STATS_DESC(tx, gso, gso_segments),
> +	VIRTNET_STATS_DESC(tx, gso, gso_segments_bytes),

these 4 make sense as mirror of the Rx

> +	VIRTNET_STATS_DESC(tx, gso, gso_packets_noseg),
> +	VIRTNET_STATS_DESC(tx, gso, gso_bytes_noseg),

Not sure what these are :) unless someone knows what it is and that
HW devices report it, let's keep them in ethtool -S ?

> +static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = {
> +	VIRTNET_STATS_DESC(rx, speed, packets_allowance_exceeded),

hw-rx-drop-ratelimits ?
"Allowance exceeded" is a bit of a mouthful to me, perhaps others
disagree. The description from the virtio spec is quite good.

> +	VIRTNET_STATS_DESC(rx, speed, bytes_allowance_exceeded),

No strong preference whether to expose this as a standard stat or
ethtool -S, we don't generally keep byte counters for drops, so
this would be special.

> +static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = {
> +	VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded),
> +	VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded),

same as rx
Xuan Zhuo March 11, 2024, 10:48 a.m. UTC | #5
On Thu, 7 Mar 2024 08:50:21 -0800, Jakub Kicinski <kuba@kernel.org> wrote:
> CC: Willem and some driver folks for more input, context: extending
> https://lore.kernel.org/all/20240306195509.1502746-1-kuba@kernel.org/
> to cover virtio stats.
>
> On Tue, 27 Feb 2024 16:03:00 +0800 Xuan Zhuo wrote:
> > +static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = {
> > +	VIRTNET_STATS_DESC(rx, basic, packets),
> > +	VIRTNET_STATS_DESC(rx, basic, bytes),
>
> Covered.

About "packets" and "bytes", here is coming from the hw device.
Actually the driver also count "packets" and "bytes" in SW.
So there are HW and SW versions. Do we need to distinguish them?

>
> > +	VIRTNET_STATS_DESC(rx, basic, notifications),
> > +	VIRTNET_STATS_DESC(rx, basic, interrupts),
>
> I haven't seen HW devices count interrupts coming from a specific
> queue (there's usually a lot more queues than IRQs these days),
> let's keep these in ethtool -S for now, unless someone has a HW use
> case.

OK.

>
> > +	VIRTNET_STATS_DESC(rx, basic, drops),
> > +	VIRTNET_STATS_DESC(rx, basic, drop_overruns),
>
> These are important, but we need to make sure we have a good definition
> for vendors to follow...
>
> drops I'd define as "sum of all packets which came into the device, but
> never left it, including but not limited to: packets dropped due to
> lack of buffer space, processing errors, explicitly set policies and
> packet filters."
> Call it hw-rx-drops ?

I agree.

>
> overruns is a bit harder to precisely define. I was thinking of
> something more broad, like: "packets dropped due to transient lack of
> resources, such as buffer space, host descriptors etc."
>
> For context why not just go with virtio spec definition of "no
> descriptors" - for HW devices, what exact point in the pipeline drops
> depends on how back pressure is configured/implemented, and fetching
> descriptors is high latency, so differentiating between "PCIe is slow"
> and "host didn't post descriptors" is hard in practice.
> Call it hw-rx-drop-overruns ?

OK.

>
> > +static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = {
> > +	VIRTNET_STATS_DESC(tx, basic, packets),
> > +	VIRTNET_STATS_DESC(tx, basic, bytes),
> > +
> > +	VIRTNET_STATS_DESC(tx, basic, notifications),
> > +	VIRTNET_STATS_DESC(tx, basic, interrupts),
> > +
> > +	VIRTNET_STATS_DESC(tx, basic, drops),
>
> These 5 same as rx.
>
> > +	VIRTNET_STATS_DESC(tx, basic, drop_malformed),
>
> These I'd call hw-tx-drop-errors, "packets dropped because they were
> invalid or malformed"?

OK.

>
> > +static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = {
> > +	VIRTNET_STATS_DESC(rx, csum, csum_valid),
>
> I think in kernel parlance that would translate to CHECKSUM_UNNECESSARY?
> So let's call it rx-csum-unnecessary ?
> I'd skip the hw- prefix for this one, it doesn't matter to the user if
> the HW or SW counted it.

OK.

>
> > +	VIRTNET_STATS_DESC(rx, csum, needs_csum),
>
> Hm, I think this is a bit software/virt device specific, presumably
> rx-csum-partial for the kernel, up to you whether to make it ethtool -S
> or netlink.

YES. This is specific for virt device.
I will make it ethtool -S. So somebody has other advice.

>
> > +	VIRTNET_STATS_DESC(rx, csum, csum_none),
> > +	VIRTNET_STATS_DESC(rx, csum, csum_bad),
>
> These two make sense as is in netlink, should be fairly commonly
> reported by devices. Maybe add a note in "bad" that packets with
> bad csum are not discarded, but still delivered to the stack.

OK.


>
> > +static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc[] = {
> > +	VIRTNET_STATS_DESC(tx, csum, needs_csum),
> > +	VIRTNET_STATS_DESC(tx, csum, csum_none),
>
> tx- version of what names we pick for rx-, netlink seems appropriate.
>
> > +static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc[] = {
> > +	VIRTNET_STATS_DESC(rx, gso, gso_packets),
> > +	VIRTNET_STATS_DESC(rx, gso, gso_bytes),
>
> I used the term "GSO" in conversations about Rx and it often confuses
> people. Let's use "GRO", so hw-gro-packets, and hw-gro-bytes ?
> Or maybe coalesce? "hw-rx-coalesce" ? That's quite a bit longer..

GRO may also confuse people.

I like hw-rx-coalesce-packets, hw-rx-coalesce-bytes.

>
> Ah, and please mention in the doc that these counters "do not cover LRO
> i.e. any coalescing implementation which doesn't follow GRO rules".

OK.

>
> > +	VIRTNET_STATS_DESC(rx, gso, gso_packets_coalesced),
>
> hw-gro-wire-packets ?
> No strong preference on the naming, but I find that saying -wire
> makes it 100% clear to everyone what the meaning is.

ok.


>
> > +	VIRTNET_STATS_DESC(rx, gso, gso_bytes_coalesced),
>
> The documentation in the virtio spec seems to be identical
> to the one for gso_packets, which gotta be unintentional?

One for num, one for bytes.


> I'm guessing this is hw-gro-wire-bytes? I.e. headers counted
> multiple times?

This is used to count the bytes of the small packets before coalescing.

> > +static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = {
> > +	VIRTNET_STATS_DESC(tx, gso, gso_packets),
> > +	VIRTNET_STATS_DESC(tx, gso, gso_bytes),
> > +	VIRTNET_STATS_DESC(tx, gso, gso_segments),
> > +	VIRTNET_STATS_DESC(tx, gso, gso_segments_bytes),
>
> these 4 make sense as mirror of the Rx
>
> > +	VIRTNET_STATS_DESC(tx, gso, gso_packets_noseg),
> > +	VIRTNET_STATS_DESC(tx, gso, gso_bytes_noseg),
>
> Not sure what these are :) unless someone knows what it is and that
> HW devices report it, let's keep them in ethtool -S ?

Just for the virtio. Let's keep them in ethtool -S.

>
> > +static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = {
> > +	VIRTNET_STATS_DESC(rx, speed, packets_allowance_exceeded),
>
> hw-rx-drop-ratelimits ?
> "Allowance exceeded" is a bit of a mouthful to me, perhaps others
> disagree. The description from the virtio spec is quite good.

OK.

>
> > +	VIRTNET_STATS_DESC(rx, speed, bytes_allowance_exceeded),
>
> No strong preference whether to expose this as a standard stat or
> ethtool -S, we don't generally keep byte counters for drops, so
> this would be special.

OK.
>
> > +static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = {
> > +	VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded),
> > +	VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded),
>
> same as rx


Thanks.
Jakub Kicinski March 11, 2024, 3:43 p.m. UTC | #6
On Mon, 11 Mar 2024 18:48:45 +0800 Xuan Zhuo wrote:
> On Thu, 7 Mar 2024 08:50:21 -0800, Jakub Kicinski <kuba@kernel.org> wrote:
> > CC: Willem and some driver folks for more input, context: extending
> > https://lore.kernel.org/all/20240306195509.1502746-1-kuba@kernel.org/
> > to cover virtio stats.
> >
> > On Tue, 27 Feb 2024 16:03:00 +0800 Xuan Zhuo wrote:  
> > > +static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = {
> > > +	VIRTNET_STATS_DESC(rx, basic, packets),
> > > +	VIRTNET_STATS_DESC(rx, basic, bytes),  
> >
> > Covered.  
> 
> About "packets" and "bytes", here is coming from the hw device.
> Actually the driver also count "packets" and "bytes" in SW.
> So there are HW and SW versions. Do we need to distinguish them?

Yup, there are already separate counters defined for SW 
and HW packets / bytes. For the feature specific counters
I don't think we need to have both SW and HW flavors defined.
But for pure rx / tx packets / bytes users may want to see both.

> > > +static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc[] = {
> > > +	VIRTNET_STATS_DESC(rx, gso, gso_packets),
> > > +	VIRTNET_STATS_DESC(rx, gso, gso_bytes),  
> >
> > I used the term "GSO" in conversations about Rx and it often confuses
> > people. Let's use "GRO", so hw-gro-packets, and hw-gro-bytes ?
> > Or maybe coalesce? "hw-rx-coalesce" ? That's quite a bit longer..  
> 
> GRO may also confuse people.
> 
> I like hw-rx-coalesce-packets, hw-rx-coalesce-bytes.

FWIW the HW offload feature in ethtool -k is called 'rx-gro-hw',
but we can use "hw-rx-coalesce-*" and mention the feature in the
documentation.
diff mbox series

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index af512d85cd5b..5549fc8508bd 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -128,6 +128,121 @@  static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
 #define VIRTNET_SQ_STATS_LEN	ARRAY_SIZE(virtnet_sq_stats_desc)
 #define VIRTNET_RQ_STATS_LEN	ARRAY_SIZE(virtnet_rq_stats_desc)
 
+#define VIRTNET_STATS_DESC(qtype, class, name) \
+	{#name, offsetof(struct virtio_net_stats_ ## qtype ## _ ## class, qtype ## _ ## name)}
+
+static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = {
+	{"command_num", offsetof(struct virtio_net_stats_cvq, command_num)},
+	{"ok_num", offsetof(struct virtio_net_stats_cvq, ok_num)}
+};
+
+static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = {
+	VIRTNET_STATS_DESC(rx, basic, packets),
+	VIRTNET_STATS_DESC(rx, basic, bytes),
+
+	VIRTNET_STATS_DESC(rx, basic, notifications),
+	VIRTNET_STATS_DESC(rx, basic, interrupts),
+
+	VIRTNET_STATS_DESC(rx, basic, drops),
+	VIRTNET_STATS_DESC(rx, basic, drop_overruns),
+};
+
+static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = {
+	VIRTNET_STATS_DESC(tx, basic, packets),
+	VIRTNET_STATS_DESC(tx, basic, bytes),
+
+	VIRTNET_STATS_DESC(tx, basic, notifications),
+	VIRTNET_STATS_DESC(tx, basic, interrupts),
+
+	VIRTNET_STATS_DESC(tx, basic, drops),
+	VIRTNET_STATS_DESC(tx, basic, drop_malformed),
+};
+
+static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = {
+	VIRTNET_STATS_DESC(rx, csum, csum_valid),
+	VIRTNET_STATS_DESC(rx, csum, needs_csum),
+
+	VIRTNET_STATS_DESC(rx, csum, csum_none),
+	VIRTNET_STATS_DESC(rx, csum, csum_bad),
+};
+
+static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc[] = {
+	VIRTNET_STATS_DESC(tx, csum, needs_csum),
+	VIRTNET_STATS_DESC(tx, csum, csum_none),
+};
+
+static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc[] = {
+	VIRTNET_STATS_DESC(rx, gso, gso_packets),
+	VIRTNET_STATS_DESC(rx, gso, gso_bytes),
+	VIRTNET_STATS_DESC(rx, gso, gso_packets_coalesced),
+	VIRTNET_STATS_DESC(rx, gso, gso_bytes_coalesced),
+};
+
+static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = {
+	VIRTNET_STATS_DESC(tx, gso, gso_packets),
+	VIRTNET_STATS_DESC(tx, gso, gso_bytes),
+	VIRTNET_STATS_DESC(tx, gso, gso_segments),
+	VIRTNET_STATS_DESC(tx, gso, gso_segments_bytes),
+	VIRTNET_STATS_DESC(tx, gso, gso_packets_noseg),
+	VIRTNET_STATS_DESC(tx, gso, gso_bytes_noseg),
+};
+
+static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = {
+	VIRTNET_STATS_DESC(rx, speed, packets_allowance_exceeded),
+	VIRTNET_STATS_DESC(rx, speed, bytes_allowance_exceeded),
+};
+
+static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = {
+	VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded),
+	VIRTNET_STATS_DESC(tx, speed, packets_allowance_exceeded),
+};
+
+struct virtnet_stats_map {
+	/* the stat type in bitmap */
+	u64 stat_type;
+
+	/* the bytes of the response for the stat */
+	u32 len;
+
+	/* the num of the response fields for the stat */
+	u32 num;
+
+#define VIRTNET_STATS_Q_TYPE_RX 0
+#define VIRTNET_STATS_Q_TYPE_TX 1
+#define VIRTNET_STATS_Q_TYPE_CQ 2
+	u32 queue_type;
+
+	/* the reply type of the stat */
+	u8 reply_type;
+
+	/* describe the name and the offset in the response */
+	const struct virtnet_stat_desc *desc;
+};
+
+#define VIRTNET_DEVICE_STATS_MAP_ITEM(TYPE, type, queue_type)	\
+	{							\
+		VIRTIO_NET_STATS_TYPE_##TYPE,			\
+		sizeof(struct virtio_net_stats_ ## type),	\
+		ARRAY_SIZE(virtnet_stats_ ## type ##_desc),	\
+		VIRTNET_STATS_Q_TYPE_##queue_type,		\
+		VIRTIO_NET_STATS_TYPE_REPLY_##TYPE,		\
+		&virtnet_stats_##type##_desc[0]			\
+	}
+
+static struct virtnet_stats_map virtio_net_stats_map[] = {
+	VIRTNET_DEVICE_STATS_MAP_ITEM(CVQ, cvq, CQ),
+
+	VIRTNET_DEVICE_STATS_MAP_ITEM(RX_BASIC, rx_basic, RX),
+	VIRTNET_DEVICE_STATS_MAP_ITEM(RX_CSUM,  rx_csum,  RX),
+	VIRTNET_DEVICE_STATS_MAP_ITEM(RX_GSO,   rx_gso,   RX),
+	VIRTNET_DEVICE_STATS_MAP_ITEM(RX_SPEED, rx_speed, RX),
+
+	VIRTNET_DEVICE_STATS_MAP_ITEM(TX_BASIC, tx_basic, TX),
+	VIRTNET_DEVICE_STATS_MAP_ITEM(TX_CSUM,  tx_csum,  TX),
+	VIRTNET_DEVICE_STATS_MAP_ITEM(TX_GSO,   tx_gso,   TX),
+	VIRTNET_DEVICE_STATS_MAP_ITEM(TX_SPEED, tx_speed, TX),
+};
+
 struct virtnet_interrupt_coalesce {
 	u32 max_packets;
 	u32 max_usecs;
@@ -244,6 +359,7 @@  struct control_buf {
 	struct virtio_net_ctrl_coal_tx coal_tx;
 	struct virtio_net_ctrl_coal_rx coal_rx;
 	struct virtio_net_ctrl_coal_vq coal_vq;
+	struct virtio_net_stats_capabilities stats_cap;
 };
 
 struct virtnet_info {
@@ -329,6 +445,8 @@  struct virtnet_info {
 
 	/* failover when STANDBY feature enabled */
 	struct failover *failover;
+
+	u64 device_stats_cap;
 };
 
 struct padded_vnet_hdr {
@@ -3263,6 +3381,204 @@  static int virtnet_set_channels(struct net_device *dev,
 	return err;
 }
 
+static void virtnet_get_hw_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data)
+{
+	struct virtnet_stats_map *m;
+	int i, j;
+	u8 *p = *data;
+
+	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) {
+		m = &virtio_net_stats_map[i];
+
+		if (m->queue_type != type)
+			continue;
+
+		if (!(vi->device_stats_cap & m->stat_type))
+			continue;
+
+		for (j = 0; j < m->num; ++j) {
+			if (type == VIRTNET_STATS_Q_TYPE_RX)
+				ethtool_sprintf(&p, "rx_queue_hw_%u_%s", qid, m->desc[j].desc);
+
+			else if (type == VIRTNET_STATS_Q_TYPE_TX)
+				ethtool_sprintf(&p, "tx_queue_hw_%u_%s", qid, m->desc[j].desc);
+
+			else if (type == VIRTNET_STATS_Q_TYPE_CQ)
+				ethtool_sprintf(&p, "cq_hw_%s", m->desc[j].desc);
+		}
+	}
+
+	*data = p;
+}
+
+struct virtnet_stats_ctx {
+	u32 num_cq;
+	u32 num_rx;
+	u32 num_tx;
+
+	u64 bitmap_cq;
+	u64 bitmap_rx;
+	u64 bitmap_tx;
+
+	u32 size_cq;
+	u32 size_rx;
+	u32 size_tx;
+
+	u64 *data;
+};
+
+static void virtnet_stats_ctx_init(struct virtnet_info *vi,
+				   struct virtnet_stats_ctx *ctx,
+				   u64 *data)
+{
+	struct virtnet_stats_map *m;
+	int i;
+
+	ctx->data = data;
+
+	for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) {
+		m = &virtio_net_stats_map[i];
+
+		if (vi->device_stats_cap & m->stat_type) {
+			if (m->queue_type == VIRTNET_STATS_Q_TYPE_CQ) {
+				ctx->bitmap_cq |= m->stat_type;
+				ctx->num_cq += m->num;
+				ctx->size_cq += m->len;
+			}
+
+			if (m->queue_type == VIRTNET_STATS_Q_TYPE_RX) {
+				ctx->bitmap_rx |= m->stat_type;
+				ctx->num_rx += m->num;
+				ctx->size_rx += m->len;
+			}
+
+			if (m->queue_type == VIRTNET_STATS_Q_TYPE_TX) {
+				ctx->bitmap_tx |= m->stat_type;
+				ctx->num_tx += m->num;
+				ctx->size_tx += m->len;
+			}
+		}
+	}
+}
+
+static int virtnet_get_hw_stats(struct virtnet_info *vi,
+				struct virtnet_stats_ctx *ctx)
+{
+	struct virtio_net_ctrl_queue_stats *req;
+	struct virtio_net_stats_reply_hdr *hdr;
+	struct scatterlist sgs_in, sgs_out;
+	u32 num_rx, num_tx, num_cq, offset;
+	int qnum, i, j,  qid, res_size;
+	struct virtnet_stats_map *m;
+	void *reply, *p;
+	u64 bitmap;
+	int ok;
+	u64 *v;
+
+	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS))
+		return 0;
+
+	qnum = 0;
+	if (ctx->bitmap_cq)
+		qnum += 1;
+
+	if (ctx->bitmap_rx)
+		qnum += vi->curr_queue_pairs;
+
+	if (ctx->bitmap_tx)
+		qnum += vi->curr_queue_pairs;
+
+	req = kcalloc(qnum, sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	res_size = (ctx->size_rx + ctx->size_tx) * vi->curr_queue_pairs + ctx->size_cq;
+	reply = kmalloc(res_size, GFP_KERNEL);
+	if (!reply) {
+		kfree(req);
+		return -ENOMEM;
+	}
+
+	j = 0;
+	for (i = 0; i < vi->curr_queue_pairs; ++i) {
+		if (ctx->bitmap_rx) {
+			req->stats[j].vq_index = cpu_to_le16(i * 2);
+			req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_rx);
+			++j;
+		}
+
+		if (ctx->bitmap_tx) {
+			req->stats[j].vq_index = cpu_to_le16(i * 2 + 1);
+			req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_tx);
+			++j;
+		}
+	}
+
+	if (ctx->size_cq) {
+		req->stats[j].vq_index = cpu_to_le16(vi->max_queue_pairs * 2);
+		req->stats[j].types_bitmap[0] = cpu_to_le64(ctx->bitmap_cq);
+		++j;
+	}
+
+	sg_init_one(&sgs_out, req, sizeof(*req) * j);
+	sg_init_one(&sgs_in, reply, res_size);
+
+	ok = virtnet_send_command(vi, VIRTIO_NET_CTRL_STATS,
+				  VIRTIO_NET_CTRL_STATS_GET,
+				  &sgs_out, &sgs_in);
+	kfree(req);
+
+	if (!ok) {
+		kfree(reply);
+		return ok;
+	}
+
+	num_rx = VIRTNET_RQ_STATS_LEN + ctx->num_rx;
+	num_tx = VIRTNET_SQ_STATS_LEN + ctx->num_tx;
+	num_cq = ctx->num_tx;
+
+	for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) {
+		hdr = p;
+
+		qid = le16_to_cpu(hdr->vq_index);
+
+		if (qid == vi->max_queue_pairs * 2) {
+			offset = 0;
+			bitmap = ctx->bitmap_cq;
+		} else if (qid % 2) {
+			offset = num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2);
+			offset += VIRTNET_SQ_STATS_LEN;
+			bitmap = ctx->bitmap_tx;
+		} else {
+			offset = num_cq + num_rx * (qid / 2) + VIRTNET_RQ_STATS_LEN;
+			bitmap = ctx->bitmap_rx;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(virtio_net_stats_map); ++i) {
+			m = &virtio_net_stats_map[i];
+
+			if (m->stat_type & bitmap)
+				offset += m->num;
+
+			if (hdr->type != m->reply_type)
+				continue;
+
+			for (j = 0; j < m->num; ++j) {
+				v = p + m->desc[j].offset;
+				ctx->data[offset + j] = le64_to_cpu(*v);
+			}
+
+			break;
+		}
+	}
+
+	kfree(reply);
+	return 0;
+}
+
 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
@@ -3271,16 +3587,22 @@  static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 
 	switch (stringset) {
 	case ETH_SS_STATS:
+		virtnet_get_hw_stats_string(vi, VIRTNET_STATS_Q_TYPE_CQ, 0, &p);
+
 		for (i = 0; i < vi->curr_queue_pairs; i++) {
 			for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++)
 				ethtool_sprintf(&p, "rx_queue_%u_%s", i,
 						virtnet_rq_stats_desc[j].desc);
+
+			virtnet_get_hw_stats_string(vi, VIRTNET_STATS_Q_TYPE_RX, i, &p);
 		}
 
 		for (i = 0; i < vi->curr_queue_pairs; i++) {
 			for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++)
 				ethtool_sprintf(&p, "tx_queue_%u_%s", i,
 						virtnet_sq_stats_desc[j].desc);
+
+			virtnet_get_hw_stats_string(vi, VIRTNET_STATS_Q_TYPE_TX, i, &p);
 		}
 		break;
 	}
@@ -3289,11 +3611,35 @@  static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 static int virtnet_get_sset_count(struct net_device *dev, int sset)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
+	struct virtnet_stats_ctx ctx = {0};
+	u32 pair_count;
 
 	switch (sset) {
 	case ETH_SS_STATS:
-		return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN +
-					       VIRTNET_SQ_STATS_LEN);
+		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS) &&
+		    !vi->device_stats_cap) {
+			struct scatterlist sg;
+
+			sg_init_one(&sg, &vi->ctrl->stats_cap, sizeof(vi->ctrl->stats_cap));
+
+			if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_STATS,
+						  VIRTIO_NET_CTRL_STATS_QUERY,
+						  NULL, &sg)) {
+				dev_warn(&dev->dev, "Fail to get stats capability\n");
+			} else {
+				__le64 v;
+
+				v = vi->ctrl->stats_cap.supported_stats_types[0];
+				vi->device_stats_cap = le64_to_cpu(v);
+			}
+		}
+
+		virtnet_stats_ctx_init(vi, &ctx, NULL);
+
+		pair_count = VIRTNET_RQ_STATS_LEN + VIRTNET_SQ_STATS_LEN;
+		pair_count += ctx.num_rx + ctx.num_tx;
+
+		return ctx.num_cq + vi->curr_queue_pairs * pair_count;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -3303,11 +3649,17 @@  static void virtnet_get_ethtool_stats(struct net_device *dev,
 				      struct ethtool_stats *stats, u64 *data)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
-	unsigned int idx = 0, start, i, j;
+	struct virtnet_stats_ctx ctx = {0};
+	unsigned int idx, start, i, j;
 	const u8 *stats_base;
 	const u64_stats_t *p;
 	size_t offset;
 
+	virtnet_stats_ctx_init(vi, &ctx, data);
+	virtnet_get_hw_stats(vi, &ctx);
+
+	idx = ctx.num_cq;
+
 	for (i = 0; i < vi->curr_queue_pairs; i++) {
 		struct receive_queue *rq = &vi->rq[i];
 
@@ -3321,6 +3673,7 @@  static void virtnet_get_ethtool_stats(struct net_device *dev,
 			}
 		} while (u64_stats_fetch_retry(&rq->stats.syncp, start));
 		idx += VIRTNET_RQ_STATS_LEN;
+		idx += ctx.num_rx;
 	}
 
 	for (i = 0; i < vi->curr_queue_pairs; i++) {
@@ -3336,6 +3689,7 @@  static void virtnet_get_ethtool_stats(struct net_device *dev,
 			}
 		} while (u64_stats_fetch_retry(&sq->stats.syncp, start));
 		idx += VIRTNET_SQ_STATS_LEN;
+		idx += ctx.num_tx;
 	}
 }
 
@@ -4963,7 +5317,7 @@  static struct virtio_device_id id_table[] = {
 	VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
 	VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \
 	VIRTIO_NET_F_VQ_NOTF_COAL, \
-	VIRTIO_NET_F_GUEST_HDRLEN
+	VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS
 
 static unsigned int features[] = {
 	VIRTNET_FEATURES,