diff mbox series

[net-next,v4] net: virtio_net: notifications coalescing support

Message ID 20220718091102.498774-1-alvaro.karsz@solid-run.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net-next,v4] net: virtio_net: notifications coalescing support | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 14 this patch: 14
netdev/cc_maintainers warning 1 maintainers not CCed: virtualization@lists.linux-foundation.org
netdev/build_clang success Errors and warnings before: 6 this patch: 6
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 14 this patch: 14
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Alvaro Karsz July 18, 2022, 9:11 a.m. UTC
New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL.

Control a Virtio network device notifications coalescing parameters
using the control virtqueue.

A device that supports this fetature can receive
VIRTIO_NET_CTRL_NOTF_COAL control commands.

- VIRTIO_NET_CTRL_NOTF_COAL_TX_SET:
  Ask the network device to change the following parameters:
  - tx_usecs: Maximum number of usecs to delay a TX notification.
  - tx_max_packets: Maximum number of packets to send before a
    TX notification.

- VIRTIO_NET_CTRL_NOTF_COAL_RX_SET:
  Ask the network device to change the following parameters:
  - rx_usecs: Maximum number of usecs to delay a RX notification.
  - rx_max_packets: Maximum number of packets to receive before a
    RX notification.

VirtIO spec. patch:
https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html

Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com>
---
v2:
	- Fix type assignments warnings found with sparse.
	- Fix a few typos.

v3:
  - Change the coalescing parameters in a dedicated function.
  - Return -EBUSY from the set coalescing function when the device's
    link is up, even if the notifications coalescing feature is negotiated.

v4:
  - If link is up and we need to update NAPI weight, return -EBUSY before
    sending the coalescing commands to the device
---
 drivers/net/virtio_net.c        | 111 +++++++++++++++++++++++++++-----
 include/uapi/linux/virtio_net.h |  34 +++++++++-
 2 files changed, 129 insertions(+), 16 deletions(-)

--
2.32.0

Comments

Jason Wang July 19, 2022, 7:03 a.m. UTC | #1
在 2022/7/18 17:11, Alvaro Karsz 写道:
> New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL.
>
> Control a Virtio network device notifications coalescing parameters
> using the control virtqueue.
>
> A device that supports this fetature can receive
> VIRTIO_NET_CTRL_NOTF_COAL control commands.
>
> - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET:
>    Ask the network device to change the following parameters:
>    - tx_usecs: Maximum number of usecs to delay a TX notification.
>    - tx_max_packets: Maximum number of packets to send before a
>      TX notification.
>
> - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET:
>    Ask the network device to change the following parameters:
>    - rx_usecs: Maximum number of usecs to delay a RX notification.
>    - rx_max_packets: Maximum number of packets to receive before a
>      RX notification.
>
> VirtIO spec. patch:
> https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html
>
> Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com>
> ---
> v2:
> 	- Fix type assignments warnings found with sparse.
> 	- Fix a few typos.
>
> v3:
>    - Change the coalescing parameters in a dedicated function.
>    - Return -EBUSY from the set coalescing function when the device's
>      link is up, even if the notifications coalescing feature is negotiated.
>
> v4:
>    - If link is up and we need to update NAPI weight, return -EBUSY before
>      sending the coalescing commands to the device
> ---
>   drivers/net/virtio_net.c        | 111 +++++++++++++++++++++++++++-----
>   include/uapi/linux/virtio_net.h |  34 +++++++++-
>   2 files changed, 129 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 356cf8dd416..4fde66bd511 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -261,6 +261,12 @@ struct virtnet_info {
>   	u8 duplex;
>   	u32 speed;
>
> +	/* Interrupt coalescing settings */
> +	u32 tx_usecs;
> +	u32 rx_usecs;
> +	u32 tx_max_packets;
> +	u32 rx_max_packets;
> +
>   	unsigned long guest_offloads;
>   	unsigned long guest_offloads_capable;
>
> @@ -2587,27 +2593,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev,
>   	return 0;
>   }
>
> +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
> +				       struct ethtool_coalesce *ec)
> +{
> +	struct scatterlist sgs_tx, sgs_rx;
> +	struct virtio_net_ctrl_coal_tx coal_tx;
> +	struct virtio_net_ctrl_coal_rx coal_rx;
> +
> +	coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
> +	coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
> +	sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx));
> +
> +	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> +				  VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
> +				  &sgs_tx))
> +		return -EINVAL;
> +
> +	/* Save parameters */
> +	vi->tx_usecs = ec->tx_coalesce_usecs;
> +	vi->tx_max_packets = ec->tx_max_coalesced_frames;
> +
> +	coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
> +	coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
> +	sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx));
> +
> +	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> +				  VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
> +				  &sgs_rx))
> +		return -EINVAL;
> +
> +	/* Save parameters */
> +	vi->rx_usecs = ec->rx_coalesce_usecs;
> +	vi->rx_max_packets = ec->rx_max_coalesced_frames;
> +
> +	return 0;
> +}
> +
> +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
> +{
> +	/* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
> +	 * feature is negotiated.
> +	 */
> +	if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
> +		return -EOPNOTSUPP;


This seems an independent fix? I wonder what if we just leave it as before.

Other looks good to me.

Thanks


> +
> +	if (ec->tx_max_coalesced_frames > 1 ||
> +	    ec->rx_max_coalesced_frames != 1)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
>   static int virtnet_set_coalesce(struct net_device *dev,
>   				struct ethtool_coalesce *ec,
>   				struct kernel_ethtool_coalesce *kernel_coal,
>   				struct netlink_ext_ack *extack)
>   {
>   	struct virtnet_info *vi = netdev_priv(dev);
> -	int i, napi_weight;
> -
> -	if (ec->tx_max_coalesced_frames > 1 ||
> -	    ec->rx_max_coalesced_frames != 1)
> -		return -EINVAL;
> +	int ret, i, napi_weight;
> +	bool update_napi = false;
>
> +	/* Can't change NAPI weight if the link is up */
>   	napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
>   	if (napi_weight ^ vi->sq[0].napi.weight) {
>   		if (dev->flags & IFF_UP)
>   			return -EBUSY;
> +		else
> +			update_napi = true;
> +	}
> +
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL))
> +		ret = virtnet_send_notf_coal_cmds(vi, ec);
> +	else
> +		ret = virtnet_coal_params_supported(ec);
> +
> +	if (ret)
> +		return ret;
> +
> +	if (update_napi) {
>   		for (i = 0; i < vi->max_queue_pairs; i++)
>   			vi->sq[i].napi.weight = napi_weight;
>   	}
>
> -	return 0;
> +	return ret;
>   }
>
>   static int virtnet_get_coalesce(struct net_device *dev,
> @@ -2615,16 +2683,19 @@ static int virtnet_get_coalesce(struct net_device *dev,
>   				struct kernel_ethtool_coalesce *kernel_coal,
>   				struct netlink_ext_ack *extack)
>   {
> -	struct ethtool_coalesce ec_default = {
> -		.cmd = ETHTOOL_GCOALESCE,
> -		.rx_max_coalesced_frames = 1,
> -	};
>   	struct virtnet_info *vi = netdev_priv(dev);
>
> -	memcpy(ec, &ec_default, sizeof(ec_default));
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
> +		ec->rx_coalesce_usecs = vi->rx_usecs;
> +		ec->tx_coalesce_usecs = vi->tx_usecs;
> +		ec->tx_max_coalesced_frames = vi->tx_max_packets;
> +		ec->rx_max_coalesced_frames = vi->rx_max_packets;
> +	} else {
> +		ec->rx_max_coalesced_frames = 1;
>
> -	if (vi->sq[0].napi.weight)
> -		ec->tx_max_coalesced_frames = 1;
> +		if (vi->sq[0].napi.weight)
> +			ec->tx_max_coalesced_frames = 1;
> +	}
>
>   	return 0;
>   }
> @@ -2743,7 +2814,8 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
>   }
>
>   static const struct ethtool_ops virtnet_ethtool_ops = {
> -	.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
> +	.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
> +		ETHTOOL_COALESCE_USECS,
>   	.get_drvinfo = virtnet_get_drvinfo,
>   	.get_link = ethtool_op_get_link,
>   	.get_ringparam = virtnet_get_ringparam,
> @@ -3411,6 +3483,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
>   	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
>   			     "VIRTIO_NET_F_CTRL_VQ") ||
>   	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
> +			     "VIRTIO_NET_F_CTRL_VQ") ||
> +	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
>   			     "VIRTIO_NET_F_CTRL_VQ"))) {
>   		return false;
>   	}
> @@ -3546,6 +3620,13 @@ static int virtnet_probe(struct virtio_device *vdev)
>   	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
>   		vi->mergeable_rx_bufs = true;
>
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
> +		vi->rx_usecs = 0;
> +		vi->tx_usecs = 0;
> +		vi->tx_max_packets = 0;
> +		vi->rx_max_packets = 0;
> +	}
> +
>   	if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
>   		vi->has_rss_hash_report = true;
>
> @@ -3780,7 +3861,7 @@ static struct virtio_device_id id_table[] = {
>   	VIRTIO_NET_F_CTRL_MAC_ADDR, \
>   	VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
>   	VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
> -	VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT
> +	VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL
>
>   static unsigned int features[] = {
>   	VIRTNET_FEATURES,
> diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> index 3f55a4215f1..29ced55514d 100644
> --- a/include/uapi/linux/virtio_net.h
> +++ b/include/uapi/linux/virtio_net.h
> @@ -56,7 +56,7 @@
>   #define VIRTIO_NET_F_MQ	22	/* Device supports Receive Flow
>   					 * Steering */
>   #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
> -
> +#define VIRTIO_NET_F_NOTF_COAL	53	/* Guest can handle notifications coalescing */
>   #define VIRTIO_NET_F_HASH_REPORT  57	/* Supports hash report */
>   #define VIRTIO_NET_F_RSS	  60	/* Supports RSS RX steering */
>   #define VIRTIO_NET_F_RSC_EXT	  61	/* extended coalescing info */
> @@ -355,4 +355,36 @@ struct virtio_net_hash_config {
>   #define VIRTIO_NET_CTRL_GUEST_OFFLOADS   5
>   #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET        0
>
> +/*
> + * Control notifications coalescing.
> + *
> + * Request the device to change the notifications coalescing parameters.
> + *
> + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit.
> + */
> +#define VIRTIO_NET_CTRL_NOTF_COAL		6
> +/*
> + * Set the tx-usecs/tx-max-packets patameters.
> + * tx-usecs - Maximum number of usecs to delay a TX notification.
> + * tx-max-packets - Maximum number of packets to send before a TX notification.
> + */
> +struct virtio_net_ctrl_coal_tx {
> +	__le32 tx_max_packets;
> +	__le32 tx_usecs;
> +};
> +
> +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET		0
> +
> +/*
> + * Set the rx-usecs/rx-max-packets patameters.
> + * rx-usecs - Maximum number of usecs to delay a RX notification.
> + * rx-max-frames - Maximum number of packets to receive before a RX notification.
> + */
> +struct virtio_net_ctrl_coal_rx {
> +	__le32 rx_max_packets;
> +	__le32 rx_usecs;
> +};
> +
> +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET		1
> +
>   #endif /* _UAPI_LINUX_VIRTIO_NET_H */
> --
> 2.32.0
>
Alvaro Karsz July 19, 2022, 7:08 a.m. UTC | #2
Hi Jason,

> This seems an independent fix? I wonder what if we just leave it as before.


Before this patch, only ETHTOOL_COALESCE_MAX_FRAMES was supported, now
we support ETHTOOL_COALESCE_USECS as well.

ETHTOOL_COALESCE_USECS is meaningless if VIRTIO_NET_F_NOTF_COAL
feature is not negotiated, so I added the mentioned part.
Jason Wang July 19, 2022, 7:12 a.m. UTC | #3
On Tue, Jul 19, 2022 at 3:09 PM Alvaro Karsz <alvaro.karsz@solid-run.com> wrote:
>
> Hi Jason,
>
> > This seems an independent fix? I wonder what if we just leave it as before.
>
>
> Before this patch, only ETHTOOL_COALESCE_MAX_FRAMES was supported, now
> we support ETHTOOL_COALESCE_USECS as well.
>
> ETHTOOL_COALESCE_USECS is meaningless if VIRTIO_NET_F_NOTF_COAL
> feature is not negotiated, so I added the mentioned part.

Yes but this "issue" exists before VIRTIO_NET_F_NOTF_COAL. It might be
better to have an independent patch to fix.

Thanks

>
Alvaro Karsz July 19, 2022, 7:18 a.m. UTC | #4
> Yes but this "issue" exists before VIRTIO_NET_F_NOTF_COAL. It might be
> better to have an independent patch to fix.


Maybe I'm wrong, but I don't think that you could receive
tx_coalesce_usecs/rx_coalesce_usecs without this patch, since the
ethtool_ops struct without this patch is:

static const struct ethtool_ops virtnet_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
        .......

And with this patch is:
static const struct ethtool_ops virtnet_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
ETHTOOL_COALESCE_USECS,
Jason Wang July 19, 2022, 7:31 a.m. UTC | #5
On Tue, Jul 19, 2022 at 3:19 PM Alvaro Karsz <alvaro.karsz@solid-run.com> wrote:
>
> > Yes but this "issue" exists before VIRTIO_NET_F_NOTF_COAL. It might be
> > better to have an independent patch to fix.
>
>
> Maybe I'm wrong, but I don't think that you could receive
> tx_coalesce_usecs/rx_coalesce_usecs without this patch, since the
> ethtool_ops struct without this patch is:
>
> static const struct ethtool_ops virtnet_ethtool_ops = {
> .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
>         .......
>
> And with this patch is:
> static const struct ethtool_ops virtnet_ethtool_ops = {
> .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
> ETHTOOL_COALESCE_USECS,
>

You're right.

So:

Acked-by: Jason Wang <jasowang@redhat.com>

Thanks
Jakub Kicinski July 20, 2022, 12:26 a.m. UTC | #6
On Mon, 18 Jul 2022 12:11:02 +0300 Alvaro Karsz wrote:
> New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL.
> 
> Control a Virtio network device notifications coalescing parameters
> using the control virtqueue.
> 
> A device that supports this fetature can receive
> VIRTIO_NET_CTRL_NOTF_COAL control commands.
> 
> - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET:
>   Ask the network device to change the following parameters:
>   - tx_usecs: Maximum number of usecs to delay a TX notification.
>   - tx_max_packets: Maximum number of packets to send before a
>     TX notification.
> 
> - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET:
>   Ask the network device to change the following parameters:
>   - rx_usecs: Maximum number of usecs to delay a RX notification.
>   - rx_max_packets: Maximum number of packets to receive before a
>     RX notification.
> 
> VirtIO spec. patch:
> https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html
> 
> Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com>

Waiting a bit longer for Michael's ack, so in case other netdev
maintainer takes this:

Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Michael S. Tsirkin July 20, 2022, 6:28 a.m. UTC | #7
On Mon, Jul 18, 2022 at 12:11:02PM +0300, Alvaro Karsz wrote:
> New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL.
> 
> Control a Virtio network device notifications coalescing parameters
> using the control virtqueue.
> 
> A device that supports this fetature can receive
> VIRTIO_NET_CTRL_NOTF_COAL control commands.
> 
> - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET:
>   Ask the network device to change the following parameters:
>   - tx_usecs: Maximum number of usecs to delay a TX notification.
>   - tx_max_packets: Maximum number of packets to send before a
>     TX notification.
> 
> - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET:
>   Ask the network device to change the following parameters:
>   - rx_usecs: Maximum number of usecs to delay a RX notification.
>   - rx_max_packets: Maximum number of packets to receive before a
>     RX notification.
> 
> VirtIO spec. patch:
> https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html
> 
> Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com>
> ---
> v2:
> 	- Fix type assignments warnings found with sparse.
> 	- Fix a few typos.
> 
> v3:
>   - Change the coalescing parameters in a dedicated function.
>   - Return -EBUSY from the set coalescing function when the device's
>     link is up, even if the notifications coalescing feature is negotiated.
> 
> v4:
>   - If link is up and we need to update NAPI weight, return -EBUSY before
>     sending the coalescing commands to the device

Thanks! some comments below

> ---
>  drivers/net/virtio_net.c        | 111 +++++++++++++++++++++++++++-----
>  include/uapi/linux/virtio_net.h |  34 +++++++++-
>  2 files changed, 129 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 356cf8dd416..4fde66bd511 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -261,6 +261,12 @@ struct virtnet_info {
>  	u8 duplex;
>  	u32 speed;
> 
> +	/* Interrupt coalescing settings */
> +	u32 tx_usecs;
> +	u32 rx_usecs;
> +	u32 tx_max_packets;
> +	u32 rx_max_packets;
> +
>  	unsigned long guest_offloads;
>  	unsigned long guest_offloads_capable;
> 
> @@ -2587,27 +2593,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev,
>  	return 0;
>  }
> 
> +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
> +				       struct ethtool_coalesce *ec)
> +{
> +	struct scatterlist sgs_tx, sgs_rx;
> +	struct virtio_net_ctrl_coal_tx coal_tx;
> +	struct virtio_net_ctrl_coal_rx coal_rx;
> +
> +	coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
> +	coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
> +	sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx));
> +
> +	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> +				  VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
> +				  &sgs_tx))
> +		return -EINVAL;
> +
> +	/* Save parameters */
> +	vi->tx_usecs = ec->tx_coalesce_usecs;
> +	vi->tx_max_packets = ec->tx_max_coalesced_frames;
> +
> +	coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
> +	coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
> +	sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx));
> +
> +	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> +				  VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
> +				  &sgs_rx))
> +		return -EINVAL;
> +
> +	/* Save parameters */
> +	vi->rx_usecs = ec->rx_coalesce_usecs;
> +	vi->rx_max_packets = ec->rx_max_coalesced_frames;
> +
> +	return 0;
> +}
> +
> +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
> +{
> +	/* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
> +	 * feature is negotiated.
> +	 */
> +	if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
> +		return -EOPNOTSUPP;
> +
> +	if (ec->tx_max_coalesced_frames > 1 ||
> +	    ec->rx_max_coalesced_frames != 1)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
>  static int virtnet_set_coalesce(struct net_device *dev,
>  				struct ethtool_coalesce *ec,
>  				struct kernel_ethtool_coalesce *kernel_coal,
>  				struct netlink_ext_ack *extack)
>  {
>  	struct virtnet_info *vi = netdev_priv(dev);
> -	int i, napi_weight;
> -
> -	if (ec->tx_max_coalesced_frames > 1 ||
> -	    ec->rx_max_coalesced_frames != 1)
> -		return -EINVAL;
> +	int ret, i, napi_weight;
> +	bool update_napi = false;
> 
> +	/* Can't change NAPI weight if the link is up */
>  	napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;

Hmm. we currently (ab)use tx_max_coalesced_frames values 0 and 1 to mean tx napi on/off.
However I am not sure we should treat any value != 1 as napi on.

I don't really have good ideas - I think abusing coalescing might
have been a mistake. But now that we are there, I feel we need
a way for userspace to at least be able to figure out whether
setting coalescing to 0 will have nasty side effects.

For example, here's a problem:

- according to spec, all values are reset to 0
- userspace reads coalescing values and gets 0

Does this mean napi is off?

And now that we support colescing, I wonder how is user going to control napi.

It's also a bit of a spec defect that it does not document corner cases
like what do 0 values do, are they different from 1? or what are max values.
Not too late to fix?


>  	if (napi_weight ^ vi->sq[0].napi.weight) {
>  		if (dev->flags & IFF_UP)
>  			return -EBUSY;
> +		else
> +			update_napi = true;
> +	}
> +
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL))
> +		ret = virtnet_send_notf_coal_cmds(vi, ec);
> +	else
> +		ret = virtnet_coal_params_supported(ec);
> +
> +	if (ret)
> +		return ret;
> +
> +	if (update_napi) {
>  		for (i = 0; i < vi->max_queue_pairs; i++)
>  			vi->sq[i].napi.weight = napi_weight;
>  	}
> 
> -	return 0;
> +	return ret;


>  }
> 
>  static int virtnet_get_coalesce(struct net_device *dev,
> @@ -2615,16 +2683,19 @@ static int virtnet_get_coalesce(struct net_device *dev,
>  				struct kernel_ethtool_coalesce *kernel_coal,
>  				struct netlink_ext_ack *extack)
>  {
> -	struct ethtool_coalesce ec_default = {
> -		.cmd = ETHTOOL_GCOALESCE,
> -		.rx_max_coalesced_frames = 1,
> -	};
>  	struct virtnet_info *vi = netdev_priv(dev);
> 
> -	memcpy(ec, &ec_default, sizeof(ec_default));
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
> +		ec->rx_coalesce_usecs = vi->rx_usecs;
> +		ec->tx_coalesce_usecs = vi->tx_usecs;
> +		ec->tx_max_coalesced_frames = vi->tx_max_packets;
> +		ec->rx_max_coalesced_frames = vi->rx_max_packets;
> +	} else {
> +		ec->rx_max_coalesced_frames = 1;
> 
> -	if (vi->sq[0].napi.weight)
> -		ec->tx_max_coalesced_frames = 1;
> +		if (vi->sq[0].napi.weight)
> +			ec->tx_max_coalesced_frames = 1;
> +	}
> 
>  	return 0;
>  }
> @@ -2743,7 +2814,8 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
>  }
> 
>  static const struct ethtool_ops virtnet_ethtool_ops = {
> -	.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
> +	.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
> +		ETHTOOL_COALESCE_USECS,
>  	.get_drvinfo = virtnet_get_drvinfo,
>  	.get_link = ethtool_op_get_link,
>  	.get_ringparam = virtnet_get_ringparam,
> @@ -3411,6 +3483,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
>  	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
>  			     "VIRTIO_NET_F_CTRL_VQ") ||
>  	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
> +			     "VIRTIO_NET_F_CTRL_VQ") ||
> +	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
>  			     "VIRTIO_NET_F_CTRL_VQ"))) {
>  		return false;
>  	}
> @@ -3546,6 +3620,13 @@ static int virtnet_probe(struct virtio_device *vdev)
>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
>  		vi->mergeable_rx_bufs = true;
> 
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
> +		vi->rx_usecs = 0;
> +		vi->tx_usecs = 0;
> +		vi->tx_max_packets = 0;
> +		vi->rx_max_packets = 0;
> +	}
> +
>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
>  		vi->has_rss_hash_report = true;
> 
> @@ -3780,7 +3861,7 @@ static struct virtio_device_id id_table[] = {
>  	VIRTIO_NET_F_CTRL_MAC_ADDR, \
>  	VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
>  	VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
> -	VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT
> +	VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL
> 
>  static unsigned int features[] = {
>  	VIRTNET_FEATURES,
> diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> index 3f55a4215f1..29ced55514d 100644
> --- a/include/uapi/linux/virtio_net.h
> +++ b/include/uapi/linux/virtio_net.h
> @@ -56,7 +56,7 @@
>  #define VIRTIO_NET_F_MQ	22	/* Device supports Receive Flow
>  					 * Steering */
>  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
> -
> +#define VIRTIO_NET_F_NOTF_COAL	53	/* Guest can handle notifications coalescing */

So the spec says
	Device supports notifications coalescing.

which makes more sense - there's not a lot guest needs to do here.


>  #define VIRTIO_NET_F_HASH_REPORT  57	/* Supports hash report */
>  #define VIRTIO_NET_F_RSS	  60	/* Supports RSS RX steering */
>  #define VIRTIO_NET_F_RSC_EXT	  61	/* extended coalescing info */
> @@ -355,4 +355,36 @@ struct virtio_net_hash_config {
>  #define VIRTIO_NET_CTRL_GUEST_OFFLOADS   5
>  #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET        0
> 
> +/*
> + * Control notifications coalescing.
> + *
> + * Request the device to change the notifications coalescing parameters.
> + *
> + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit.
> + */
> +#define VIRTIO_NET_CTRL_NOTF_COAL		6
> +/*
> + * Set the tx-usecs/tx-max-packets patameters.

parameters?

> + * tx-usecs - Maximum number of usecs to delay a TX notification.
> + * tx-max-packets - Maximum number of packets to send before a TX notification.

why with dash here? And why not just put the comments near the fields
themselves?

> + */
> +struct virtio_net_ctrl_coal_tx {
> +	__le32 tx_max_packets;
> +	__le32 tx_usecs;
> +};
> +
> +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET		0
> +
> +/*
> + * Set the rx-usecs/rx-max-packets patameters.
> + * rx-usecs - Maximum number of usecs to delay a RX notification.
> + * rx-max-frames - Maximum number of packets to receive before a RX notification.
> + */
> +struct virtio_net_ctrl_coal_rx {
> +	__le32 rx_max_packets;
> +	__le32 rx_usecs;
> +};

same comments

> +
> +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET		1
> +
>  #endif /* _UAPI_LINUX_VIRTIO_NET_H */
> --
> 2.32.0
Michael S. Tsirkin July 20, 2022, 6:45 a.m. UTC | #8
On Tue, Jul 19, 2022 at 05:26:52PM -0700, Jakub Kicinski wrote:
> On Mon, 18 Jul 2022 12:11:02 +0300 Alvaro Karsz wrote:
> > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL.
> > 
> > Control a Virtio network device notifications coalescing parameters
> > using the control virtqueue.
> > 
> > A device that supports this fetature can receive
> > VIRTIO_NET_CTRL_NOTF_COAL control commands.
> > 
> > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET:
> >   Ask the network device to change the following parameters:
> >   - tx_usecs: Maximum number of usecs to delay a TX notification.
> >   - tx_max_packets: Maximum number of packets to send before a
> >     TX notification.
> > 
> > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET:
> >   Ask the network device to change the following parameters:
> >   - rx_usecs: Maximum number of usecs to delay a RX notification.
> >   - rx_max_packets: Maximum number of packets to receive before a
> >     RX notification.
> > 
> > VirtIO spec. patch:
> > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html
> > 
> > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com>
> 
> Waiting a bit longer for Michael's ack, so in case other netdev
> maintainer takes this:
> 
> Reviewed-by: Jakub Kicinski <kuba@kernel.org>

Yea was going to ack this but looking at the UAPI again we have a
problem because we abused tax max frames values 0 and 1 to control napi
in the past. technically does not affect legacy cards but userspace
can't easily tell the difference, can it?
Jason Wang July 20, 2022, 7:02 a.m. UTC | #9
On Wed, Jul 20, 2022 at 2:45 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Tue, Jul 19, 2022 at 05:26:52PM -0700, Jakub Kicinski wrote:
> > On Mon, 18 Jul 2022 12:11:02 +0300 Alvaro Karsz wrote:
> > > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL.
> > >
> > > Control a Virtio network device notifications coalescing parameters
> > > using the control virtqueue.
> > >
> > > A device that supports this fetature can receive
> > > VIRTIO_NET_CTRL_NOTF_COAL control commands.
> > >
> > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET:
> > >   Ask the network device to change the following parameters:
> > >   - tx_usecs: Maximum number of usecs to delay a TX notification.
> > >   - tx_max_packets: Maximum number of packets to send before a
> > >     TX notification.
> > >
> > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET:
> > >   Ask the network device to change the following parameters:
> > >   - rx_usecs: Maximum number of usecs to delay a RX notification.
> > >   - rx_max_packets: Maximum number of packets to receive before a
> > >     RX notification.
> > >
> > > VirtIO spec. patch:
> > > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html
> > >
> > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com>
> >
> > Waiting a bit longer for Michael's ack, so in case other netdev
> > maintainer takes this:
> >
> > Reviewed-by: Jakub Kicinski <kuba@kernel.org>
>
> Yea was going to ack this but looking at the UAPI again we have a
> problem because we abused tax max frames values 0 and 1 to control napi
> in the past. technically does not affect legacy cards but userspace
> can't easily tell the difference, can it?

The "abuse" only works for iproute2. For uAPI we know it should follow
the spec? (anyhow NAPI is something out of the spec)

Thanks

>
> --
> MST
>
Michael S. Tsirkin July 20, 2022, 7:05 a.m. UTC | #10
On Wed, Jul 20, 2022 at 03:02:04PM +0800, Jason Wang wrote:
> On Wed, Jul 20, 2022 at 2:45 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Tue, Jul 19, 2022 at 05:26:52PM -0700, Jakub Kicinski wrote:
> > > On Mon, 18 Jul 2022 12:11:02 +0300 Alvaro Karsz wrote:
> > > > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL.
> > > >
> > > > Control a Virtio network device notifications coalescing parameters
> > > > using the control virtqueue.
> > > >
> > > > A device that supports this fetature can receive
> > > > VIRTIO_NET_CTRL_NOTF_COAL control commands.
> > > >
> > > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET:
> > > >   Ask the network device to change the following parameters:
> > > >   - tx_usecs: Maximum number of usecs to delay a TX notification.
> > > >   - tx_max_packets: Maximum number of packets to send before a
> > > >     TX notification.
> > > >
> > > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET:
> > > >   Ask the network device to change the following parameters:
> > > >   - rx_usecs: Maximum number of usecs to delay a RX notification.
> > > >   - rx_max_packets: Maximum number of packets to receive before a
> > > >     RX notification.
> > > >
> > > > VirtIO spec. patch:
> > > > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html
> > > >
> > > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com>
> > >
> > > Waiting a bit longer for Michael's ack, so in case other netdev
> > > maintainer takes this:
> > >
> > > Reviewed-by: Jakub Kicinski <kuba@kernel.org>
> >
> > Yea was going to ack this but looking at the UAPI again we have a
> > problem because we abused tax max frames values 0 and 1 to control napi
> > in the past. technically does not affect legacy cards but userspace
> > can't easily tell the difference, can it?
> 
> The "abuse" only works for iproute2.

That's kernel/userspace API. That's what this patch affects, right?

> For uAPI we know it should follow
> the spec? (anyhow NAPI is something out of the spec)
> 
> Thanks

When you say uAPI here you mean the virtio header. I am not
worried about that just yet (maybe I should be).

> >
> > --
> > MST
> >
Jason Wang July 20, 2022, 7:15 a.m. UTC | #11
On Wed, Jul 20, 2022 at 3:05 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Jul 20, 2022 at 03:02:04PM +0800, Jason Wang wrote:
> > On Wed, Jul 20, 2022 at 2:45 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Tue, Jul 19, 2022 at 05:26:52PM -0700, Jakub Kicinski wrote:
> > > > On Mon, 18 Jul 2022 12:11:02 +0300 Alvaro Karsz wrote:
> > > > > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL.
> > > > >
> > > > > Control a Virtio network device notifications coalescing parameters
> > > > > using the control virtqueue.
> > > > >
> > > > > A device that supports this fetature can receive
> > > > > VIRTIO_NET_CTRL_NOTF_COAL control commands.
> > > > >
> > > > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET:
> > > > >   Ask the network device to change the following parameters:
> > > > >   - tx_usecs: Maximum number of usecs to delay a TX notification.
> > > > >   - tx_max_packets: Maximum number of packets to send before a
> > > > >     TX notification.
> > > > >
> > > > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET:
> > > > >   Ask the network device to change the following parameters:
> > > > >   - rx_usecs: Maximum number of usecs to delay a RX notification.
> > > > >   - rx_max_packets: Maximum number of packets to receive before a
> > > > >     RX notification.
> > > > >
> > > > > VirtIO spec. patch:
> > > > > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html
> > > > >
> > > > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com>
> > > >
> > > > Waiting a bit longer for Michael's ack, so in case other netdev
> > > > maintainer takes this:
> > > >
> > > > Reviewed-by: Jakub Kicinski <kuba@kernel.org>
> > >
> > > Yea was going to ack this but looking at the UAPI again we have a
> > > problem because we abused tax max frames values 0 and 1 to control napi
> > > in the past. technically does not affect legacy cards but userspace
> > > can't easily tell the difference, can it?
> >
> > The "abuse" only works for iproute2.
>
> That's kernel/userspace API. That's what this patch affects, right?

I'm not sure I get this.

The 1-to-enable-napi is only used between iproute2 and kernel via
ETHTOOL_A_COALESCE_TX_MAX_FRAMES not the uAPI introduced here.

So I don't see how it can conflict with the virito uAPI extension here.

Thanks

>
> > For uAPI we know it should follow
> > the spec? (anyhow NAPI is something out of the spec)
> >
> > Thanks
>
> When you say uAPI here you mean the virtio header. I am not
> worried about that just yet (maybe I should be).
>
> > >
> > > --
> > > MST
> > >
>
Michael S. Tsirkin July 20, 2022, 7:15 a.m. UTC | #12
On Wed, Jul 20, 2022 at 10:07:11AM +0300, Alvaro Karsz wrote:
> > Hmm. we currently (ab)use tx_max_coalesced_frames values 0 and 1 to mean tx
> napi on/off.
> > However I am not sure we should treat any value != 1 as napi on.
> >
> > I don't really have good ideas - I think abusing coalescing might
> > have been a mistake. But now that we are there, I feel we need
> > a way for userspace to at least be able to figure out whether
> > setting coalescing to 0 will have nasty side effects.
> 
> 
> So, how can I proceed from here?
> Maybe we don't need to use tx napi when this feature is negotiated (like Jakub
> suggested in prev. versions)?
> It makes sense, since the number of TX notifications can be reduced by setting
> tx_usecs/tx_max_packets with ethtool.


Hmm Jason had some ideas about extensions in mind when he
coded the current UAPI, let's see if he has ideas.
I'll ruminate on compatibility a bit too.

> > It's also a bit of a spec defect that it does not document corner cases
> > like what do 0 values do, are they different from 1? or what are max values.
> > Not too late to fix?
> 
> 
> I think that some of the corner cases can be understood from the coalescing
> values.
> For example:
> if rx_usecs=0 we should wait for 0 usecs, meaning that we should send a
> notification immediately.
> But if rx_usecs=1 we should wait for 1 usec.
> The case with max_packets is a little bit unclear for the values 0/1, and it
> seems that in both cases we should send a notification immediately after
> receiving/sending a packet.
> 
> 
> > So the spec says
> >         Device supports notifications coalescing.
> >
> > which makes more sense - there's not a lot guest needs to do here.
> 
> 
> Noted.
> 
> > parameters?
> 
>  
> I'll change it to "settings".
> 
> > why with dash here? And why not just put the comments near the fields
> > themselves?
> 
> 
> Noted.
Jason Wang July 20, 2022, 7:42 a.m. UTC | #13
On Wed, Jul 20, 2022 at 3:15 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Jul 20, 2022 at 10:07:11AM +0300, Alvaro Karsz wrote:
> > > Hmm. we currently (ab)use tx_max_coalesced_frames values 0 and 1 to mean tx
> > napi on/off.
> > > However I am not sure we should treat any value != 1 as napi on.
> > >
> > > I don't really have good ideas - I think abusing coalescing might
> > > have been a mistake. But now that we are there, I feel we need
> > > a way for userspace to at least be able to figure out whether
> > > setting coalescing to 0 will have nasty side effects.
> >
> >
> > So, how can I proceed from here?
> > Maybe we don't need to use tx napi when this feature is negotiated (like Jakub
> > suggested in prev. versions)?
> > It makes sense, since the number of TX notifications can be reduced by setting
> > tx_usecs/tx_max_packets with ethtool.
>
>
> Hmm Jason had some ideas about extensions in mind when he
> coded the current UAPI, let's see if he has ideas.
> I'll ruminate on compatibility a bit too.

I might be wrong, but using 1 to enable tx napi is a way to try to be
compatible with the interrupt coalescing.

That is, without notification coalescing, if 1 is set, we enable tx
notifications (and NAPI) for each packet. This works as if
tx-max-coalesced-frames is set to 1 when notification coalescing is
negotiated.

Thanks

>
> > > It's also a bit of a spec defect that it does not document corner cases
> > > like what do 0 values do, are they different from 1? or what are max values.
> > > Not too late to fix?
> >
> >
> > I think that some of the corner cases can be understood from the coalescing
> > values.
> > For example:
> > if rx_usecs=0 we should wait for 0 usecs, meaning that we should send a
> > notification immediately.
> > But if rx_usecs=1 we should wait for 1 usec.
> > The case with max_packets is a little bit unclear for the values 0/1, and it
> > seems that in both cases we should send a notification immediately after
> > receiving/sending a packet.
> >
> >
> > > So the spec says
> > >         Device supports notifications coalescing.
> > >
> > > which makes more sense - there's not a lot guest needs to do here.
> >
> >
> > Noted.
> >
> > > parameters?
> >
> >
> > I'll change it to "settings".
> >
> > > why with dash here? And why not just put the comments near the fields
> > > themselves?
> >
> >
> > Noted.
>
Jason Wang July 20, 2022, 7:45 a.m. UTC | #14
On Wed, Jul 20, 2022 at 3:42 PM Jason Wang <jasowang@redhat.com> wrote:
>
> On Wed, Jul 20, 2022 at 3:15 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Wed, Jul 20, 2022 at 10:07:11AM +0300, Alvaro Karsz wrote:
> > > > Hmm. we currently (ab)use tx_max_coalesced_frames values 0 and 1 to mean tx
> > > napi on/off.
> > > > However I am not sure we should treat any value != 1 as napi on.
> > > >
> > > > I don't really have good ideas - I think abusing coalescing might
> > > > have been a mistake. But now that we are there, I feel we need
> > > > a way for userspace to at least be able to figure out whether
> > > > setting coalescing to 0 will have nasty side effects.
> > >
> > >
> > > So, how can I proceed from here?
> > > Maybe we don't need to use tx napi when this feature is negotiated (like Jakub
> > > suggested in prev. versions)?
> > > It makes sense, since the number of TX notifications can be reduced by setting
> > > tx_usecs/tx_max_packets with ethtool.
> >
> >
> > Hmm Jason had some ideas about extensions in mind when he
> > coded the current UAPI, let's see if he has ideas.
> > I'll ruminate on compatibility a bit too.
>
> I might be wrong, but using 1 to enable tx napi is a way to try to be
> compatible with the interrupt coalescing.
>
> That is, without notification coalescing, if 1 is set, we enable tx
> notifications (and NAPI) for each packet. This works as if
> tx-max-coalesced-frames is set to 1 when notification coalescing is
> negotiated.
>
> Thanks
>
> >
> > > > It's also a bit of a spec defect that it does not document corner cases
> > > > like what do 0 values do, are they different from 1? or what are max values.
> > > > Not too late to fix?
> > >
> > >
> > > I think that some of the corner cases can be understood from the coalescing
> > > values.
> > > For example:
> > > if rx_usecs=0 we should wait for 0 usecs, meaning that we should send a
> > > notification immediately.
> > > But if rx_usecs=1 we should wait for 1 usec.
> > > The case with max_packets is a little bit unclear for the values 0/1, and it
> > > seems that in both cases we should send a notification immediately after
> > > receiving/sending a packet.

Then we probably need to document this in the spec.

And we need an upper limit for those values, this helps for e.g
migration compatibility.

Thanks

> > >
> > >
> > > > So the spec says
> > > >         Device supports notifications coalescing.
> > > >
> > > > which makes more sense - there's not a lot guest needs to do here.
> > >
> > >
> > > Noted.
> > >
> > > > parameters?
> > >
> > >
> > > I'll change it to "settings".
> > >
> > > > why with dash here? And why not just put the comments near the fields
> > > > themselves?
> > >
> > >
> > > Noted.
> >
Alvaro Karsz July 20, 2022, 7:56 a.m. UTC | #15
> And we need an upper limit for those values, this helps for e.g
> migration compatibility.

Why not let the device decide the upper limit, making it device specific?
As written in the spec., a device can answer to the coalescing command
with VIRTIO_NET_ERR,
If it was not able to set the requested settings.

If a physical device uses virtio datapath, and can for example
coalesce notifications up to 500us, why should we limit it with a
lower number?
Jason Wang July 20, 2022, 8:28 a.m. UTC | #16
On Wed, Jul 20, 2022 at 3:57 PM Alvaro Karsz <alvaro.karsz@solid-run.com> wrote:
>
> > And we need an upper limit for those values, this helps for e.g
> > migration compatibility.
>
> Why not let the device decide the upper limit, making it device specific?

For example we may want to migrate VM from src to dst.

In src, we set tx-max-coalesced-frames to 512 but the dst only supports 256.

> As written in the spec., a device can answer to the coalescing command
> with VIRTIO_NET_ERR,
> If it was not able to set the requested settings.
>
> If a physical device uses virtio datapath, and can for example
> coalesce notifications up to 500us, why should we limit it with a
> lower number?
>

See above, for migration compatibility.

Thanks
Jason Wang July 20, 2022, 8:52 a.m. UTC | #17
On Wed, Jul 20, 2022 at 2:28 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Jul 18, 2022 at 12:11:02PM +0300, Alvaro Karsz wrote:
> > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL.
> >
> > Control a Virtio network device notifications coalescing parameters
> > using the control virtqueue.
> >
> > A device that supports this fetature can receive
> > VIRTIO_NET_CTRL_NOTF_COAL control commands.
> >
> > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET:
> >   Ask the network device to change the following parameters:
> >   - tx_usecs: Maximum number of usecs to delay a TX notification.
> >   - tx_max_packets: Maximum number of packets to send before a
> >     TX notification.
> >
> > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET:
> >   Ask the network device to change the following parameters:
> >   - rx_usecs: Maximum number of usecs to delay a RX notification.
> >   - rx_max_packets: Maximum number of packets to receive before a
> >     RX notification.
> >
> > VirtIO spec. patch:
> > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html
> >
> > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com>
> > ---
> > v2:
> >       - Fix type assignments warnings found with sparse.
> >       - Fix a few typos.
> >
> > v3:
> >   - Change the coalescing parameters in a dedicated function.
> >   - Return -EBUSY from the set coalescing function when the device's
> >     link is up, even if the notifications coalescing feature is negotiated.
> >
> > v4:
> >   - If link is up and we need to update NAPI weight, return -EBUSY before
> >     sending the coalescing commands to the device
>
> Thanks! some comments below
>
> > ---
> >  drivers/net/virtio_net.c        | 111 +++++++++++++++++++++++++++-----
> >  include/uapi/linux/virtio_net.h |  34 +++++++++-
> >  2 files changed, 129 insertions(+), 16 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 356cf8dd416..4fde66bd511 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -261,6 +261,12 @@ struct virtnet_info {
> >       u8 duplex;
> >       u32 speed;
> >
> > +     /* Interrupt coalescing settings */
> > +     u32 tx_usecs;
> > +     u32 rx_usecs;
> > +     u32 tx_max_packets;
> > +     u32 rx_max_packets;
> > +
> >       unsigned long guest_offloads;
> >       unsigned long guest_offloads_capable;
> >
> > @@ -2587,27 +2593,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev,
> >       return 0;
> >  }
> >
> > +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
> > +                                    struct ethtool_coalesce *ec)
> > +{
> > +     struct scatterlist sgs_tx, sgs_rx;
> > +     struct virtio_net_ctrl_coal_tx coal_tx;
> > +     struct virtio_net_ctrl_coal_rx coal_rx;
> > +
> > +     coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
> > +     coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
> > +     sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx));
> > +
> > +     if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> > +                               VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
> > +                               &sgs_tx))
> > +             return -EINVAL;
> > +
> > +     /* Save parameters */
> > +     vi->tx_usecs = ec->tx_coalesce_usecs;
> > +     vi->tx_max_packets = ec->tx_max_coalesced_frames;
> > +
> > +     coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
> > +     coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
> > +     sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx));
> > +
> > +     if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> > +                               VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
> > +                               &sgs_rx))
> > +             return -EINVAL;
> > +
> > +     /* Save parameters */
> > +     vi->rx_usecs = ec->rx_coalesce_usecs;
> > +     vi->rx_max_packets = ec->rx_max_coalesced_frames;
> > +
> > +     return 0;
> > +}
> > +
> > +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
> > +{
> > +     /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
> > +      * feature is negotiated.
> > +      */
> > +     if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
> > +             return -EOPNOTSUPP;
> > +
> > +     if (ec->tx_max_coalesced_frames > 1 ||
> > +         ec->rx_max_coalesced_frames != 1)
> > +             return -EINVAL;
> > +
> > +     return 0;
> > +}
> > +
> >  static int virtnet_set_coalesce(struct net_device *dev,
> >                               struct ethtool_coalesce *ec,
> >                               struct kernel_ethtool_coalesce *kernel_coal,
> >                               struct netlink_ext_ack *extack)
> >  {
> >       struct virtnet_info *vi = netdev_priv(dev);
> > -     int i, napi_weight;
> > -
> > -     if (ec->tx_max_coalesced_frames > 1 ||
> > -         ec->rx_max_coalesced_frames != 1)
> > -             return -EINVAL;
> > +     int ret, i, napi_weight;
> > +     bool update_napi = false;
> >
> > +     /* Can't change NAPI weight if the link is up */
> >       napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
>
> Hmm. we currently (ab)use tx_max_coalesced_frames values 0 and 1 to mean tx napi on/off.
> However I am not sure we should treat any value != 1 as napi on.
>
> I don't really have good ideas - I think abusing coalescing might
> have been a mistake. But now that we are there, I feel we need
> a way for userspace to at least be able to figure out whether
> setting coalescing to 0 will have nasty side effects.
>
> For example, here's a problem:
>
> - according to spec, all values are reset to 0
> - userspace reads coalescing values and gets 0
>
> Does this mean napi is off?
>
> And now that we support colescing, I wonder how is user going to control napi.

I think we probably don't want to let users know about NAPI. And
actually, tx NAPI is a byproduct of the tx notification.

Historically, we don't use tx interrupts but skb_orphan() unless the
queue is about to be full. But it tends to cause a lot of side
effects. Then we try to enable tx interrupt with tx NAPI, but in order
to not have performance regression, we enable it via ethtool
(tx-max-coalesce-frames).  Setting 1 means the driver wants to be
notified for each sent packet, tx NAPI is a must in this case.

Note that the tx notification (NAPI) has been enabled by default for
years. If you have concern, I wonder if it's the time to drop
skb_orphan() completely from virtio-net driver. Then we will be fine?

Thanks

>
> It's also a bit of a spec defect that it does not document corner cases
> like what do 0 values do, are they different from 1? or what are max values.
> Not too late to fix?
>
>
> >       if (napi_weight ^ vi->sq[0].napi.weight) {
> >               if (dev->flags & IFF_UP)
> >                       return -EBUSY;
> > +             else
> > +                     update_napi = true;
> > +     }
> > +
> > +     if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL))
> > +             ret = virtnet_send_notf_coal_cmds(vi, ec);
> > +     else
> > +             ret = virtnet_coal_params_supported(ec);
> > +
> > +     if (ret)
> > +             return ret;
> > +
> > +     if (update_napi) {
> >               for (i = 0; i < vi->max_queue_pairs; i++)
> >                       vi->sq[i].napi.weight = napi_weight;
> >       }
> >
> > -     return 0;
> > +     return ret;
>
>
> >  }
> >
> >  static int virtnet_get_coalesce(struct net_device *dev,
> > @@ -2615,16 +2683,19 @@ static int virtnet_get_coalesce(struct net_device *dev,
> >                               struct kernel_ethtool_coalesce *kernel_coal,
> >                               struct netlink_ext_ack *extack)
> >  {
> > -     struct ethtool_coalesce ec_default = {
> > -             .cmd = ETHTOOL_GCOALESCE,
> > -             .rx_max_coalesced_frames = 1,
> > -     };
> >       struct virtnet_info *vi = netdev_priv(dev);
> >
> > -     memcpy(ec, &ec_default, sizeof(ec_default));
> > +     if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
> > +             ec->rx_coalesce_usecs = vi->rx_usecs;
> > +             ec->tx_coalesce_usecs = vi->tx_usecs;
> > +             ec->tx_max_coalesced_frames = vi->tx_max_packets;
> > +             ec->rx_max_coalesced_frames = vi->rx_max_packets;
> > +     } else {
> > +             ec->rx_max_coalesced_frames = 1;
> >
> > -     if (vi->sq[0].napi.weight)
> > -             ec->tx_max_coalesced_frames = 1;
> > +             if (vi->sq[0].napi.weight)
> > +                     ec->tx_max_coalesced_frames = 1;
> > +     }
> >
> >       return 0;
> >  }
> > @@ -2743,7 +2814,8 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
> >  }
> >
> >  static const struct ethtool_ops virtnet_ethtool_ops = {
> > -     .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
> > +     .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
> > +             ETHTOOL_COALESCE_USECS,
> >       .get_drvinfo = virtnet_get_drvinfo,
> >       .get_link = ethtool_op_get_link,
> >       .get_ringparam = virtnet_get_ringparam,
> > @@ -3411,6 +3483,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
> >            VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
> >                            "VIRTIO_NET_F_CTRL_VQ") ||
> >            VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
> > +                          "VIRTIO_NET_F_CTRL_VQ") ||
> > +          VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
> >                            "VIRTIO_NET_F_CTRL_VQ"))) {
> >               return false;
> >       }
> > @@ -3546,6 +3620,13 @@ static int virtnet_probe(struct virtio_device *vdev)
> >       if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
> >               vi->mergeable_rx_bufs = true;
> >
> > +     if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
> > +             vi->rx_usecs = 0;
> > +             vi->tx_usecs = 0;
> > +             vi->tx_max_packets = 0;
> > +             vi->rx_max_packets = 0;
> > +     }
> > +
> >       if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
> >               vi->has_rss_hash_report = true;
> >
> > @@ -3780,7 +3861,7 @@ static struct virtio_device_id id_table[] = {
> >       VIRTIO_NET_F_CTRL_MAC_ADDR, \
> >       VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
> >       VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
> > -     VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT
> > +     VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL
> >
> >  static unsigned int features[] = {
> >       VIRTNET_FEATURES,
> > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> > index 3f55a4215f1..29ced55514d 100644
> > --- a/include/uapi/linux/virtio_net.h
> > +++ b/include/uapi/linux/virtio_net.h
> > @@ -56,7 +56,7 @@
> >  #define VIRTIO_NET_F_MQ      22      /* Device supports Receive Flow
> >                                        * Steering */
> >  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23        /* Set MAC address */
> > -
> > +#define VIRTIO_NET_F_NOTF_COAL       53      /* Guest can handle notifications coalescing */
>
> So the spec says
>         Device supports notifications coalescing.
>
> which makes more sense - there's not a lot guest needs to do here.
>
>
> >  #define VIRTIO_NET_F_HASH_REPORT  57 /* Supports hash report */
> >  #define VIRTIO_NET_F_RSS       60    /* Supports RSS RX steering */
> >  #define VIRTIO_NET_F_RSC_EXT   61    /* extended coalescing info */
> > @@ -355,4 +355,36 @@ struct virtio_net_hash_config {
> >  #define VIRTIO_NET_CTRL_GUEST_OFFLOADS   5
> >  #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET        0
> >
> > +/*
> > + * Control notifications coalescing.
> > + *
> > + * Request the device to change the notifications coalescing parameters.
> > + *
> > + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit.
> > + */
> > +#define VIRTIO_NET_CTRL_NOTF_COAL            6
> > +/*
> > + * Set the tx-usecs/tx-max-packets patameters.
>
> parameters?
>
> > + * tx-usecs - Maximum number of usecs to delay a TX notification.
> > + * tx-max-packets - Maximum number of packets to send before a TX notification.
>
> why with dash here? And why not just put the comments near the fields
> themselves?
>
> > + */
> > +struct virtio_net_ctrl_coal_tx {
> > +     __le32 tx_max_packets;
> > +     __le32 tx_usecs;
> > +};
> > +
> > +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET             0
> > +
> > +/*
> > + * Set the rx-usecs/rx-max-packets patameters.
> > + * rx-usecs - Maximum number of usecs to delay a RX notification.
> > + * rx-max-frames - Maximum number of packets to receive before a RX notification.
> > + */
> > +struct virtio_net_ctrl_coal_rx {
> > +     __le32 rx_max_packets;
> > +     __le32 rx_usecs;
> > +};
>
> same comments
>
> > +
> > +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET             1
> > +
> >  #endif /* _UAPI_LINUX_VIRTIO_NET_H */
> > --
> > 2.32.0
>
Michael S. Tsirkin July 20, 2022, 9:05 a.m. UTC | #18
On Wed, Jul 20, 2022 at 03:15:08PM +0800, Jason Wang wrote:
> On Wed, Jul 20, 2022 at 3:05 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Wed, Jul 20, 2022 at 03:02:04PM +0800, Jason Wang wrote:
> > > On Wed, Jul 20, 2022 at 2:45 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Tue, Jul 19, 2022 at 05:26:52PM -0700, Jakub Kicinski wrote:
> > > > > On Mon, 18 Jul 2022 12:11:02 +0300 Alvaro Karsz wrote:
> > > > > > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL.
> > > > > >
> > > > > > Control a Virtio network device notifications coalescing parameters
> > > > > > using the control virtqueue.
> > > > > >
> > > > > > A device that supports this fetature can receive
> > > > > > VIRTIO_NET_CTRL_NOTF_COAL control commands.
> > > > > >
> > > > > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET:
> > > > > >   Ask the network device to change the following parameters:
> > > > > >   - tx_usecs: Maximum number of usecs to delay a TX notification.
> > > > > >   - tx_max_packets: Maximum number of packets to send before a
> > > > > >     TX notification.
> > > > > >
> > > > > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET:
> > > > > >   Ask the network device to change the following parameters:
> > > > > >   - rx_usecs: Maximum number of usecs to delay a RX notification.
> > > > > >   - rx_max_packets: Maximum number of packets to receive before a
> > > > > >     RX notification.
> > > > > >
> > > > > > VirtIO spec. patch:
> > > > > > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html
> > > > > >
> > > > > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com>
> > > > >
> > > > > Waiting a bit longer for Michael's ack, so in case other netdev
> > > > > maintainer takes this:
> > > > >
> > > > > Reviewed-by: Jakub Kicinski <kuba@kernel.org>
> > > >
> > > > Yea was going to ack this but looking at the UAPI again we have a
> > > > problem because we abused tax max frames values 0 and 1 to control napi
> > > > in the past. technically does not affect legacy cards but userspace
> > > > can't easily tell the difference, can it?
> > >
> > > The "abuse" only works for iproute2.
> >
> > That's kernel/userspace API. That's what this patch affects, right?
> 
> I'm not sure I get this.
> 
> The 1-to-enable-napi is only used between iproute2 and kernel via
> ETHTOOL_A_COALESCE_TX_MAX_FRAMES not the uAPI introduced here.
> So I don't see how it can conflict with the virito uAPI extension here.
> 
> Thanks

As far as I can see ETHTOOL_A_COALESCE_TX_MAX_FRAMES invokes the
ops->get_coalesce and ops->set_coalesce callbacks.
This patch changes their behaviour when the card has VIRTIO_NET_F_NOTF_COAL.

Userspace making assumptions about what this option does will
thinkably might get unexpected behaviour. So:

Minimally we need a way for userspace to find out what are the semantics
of the command now, so one can implement portable userspace going
forward.

Preferably, analysis of existing userspace, what it does and how
does the change affect it should be included.

Ideally, a work-around that does not affect existing userspace
would be found.


> 
> >
> > > For uAPI we know it should follow
> > > the spec? (anyhow NAPI is something out of the spec)
> > >
> > > Thanks
> >
> > When you say uAPI here you mean the virtio header. I am not
> > worried about that just yet (maybe I should be).
> >
> > > >
> > > > --
> > > > MST
> > > >
> >
Michael S. Tsirkin July 20, 2022, 9:09 a.m. UTC | #19
On Wed, Jul 20, 2022 at 03:45:59PM +0800, Jason Wang wrote:
> On Wed, Jul 20, 2022 at 3:42 PM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Wed, Jul 20, 2022 at 3:15 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Wed, Jul 20, 2022 at 10:07:11AM +0300, Alvaro Karsz wrote:
> > > > > Hmm. we currently (ab)use tx_max_coalesced_frames values 0 and 1 to mean tx
> > > > napi on/off.
> > > > > However I am not sure we should treat any value != 1 as napi on.
> > > > >
> > > > > I don't really have good ideas - I think abusing coalescing might
> > > > > have been a mistake. But now that we are there, I feel we need
> > > > > a way for userspace to at least be able to figure out whether
> > > > > setting coalescing to 0 will have nasty side effects.
> > > >
> > > >
> > > > So, how can I proceed from here?
> > > > Maybe we don't need to use tx napi when this feature is negotiated (like Jakub
> > > > suggested in prev. versions)?
> > > > It makes sense, since the number of TX notifications can be reduced by setting
> > > > tx_usecs/tx_max_packets with ethtool.
> > >
> > >
> > > Hmm Jason had some ideas about extensions in mind when he
> > > coded the current UAPI, let's see if he has ideas.
> > > I'll ruminate on compatibility a bit too.
> >
> > I might be wrong, but using 1 to enable tx napi is a way to try to be
> > compatible with the interrupt coalescing.
> >
> > That is, without notification coalescing, if 1 is set, we enable tx
> > notifications (and NAPI) for each packet. This works as if
> > tx-max-coalesced-frames is set to 1 when notification coalescing is
> > negotiated.
> >
> > Thanks
> >
> > >
> > > > > It's also a bit of a spec defect that it does not document corner cases
> > > > > like what do 0 values do, are they different from 1? or what are max values.
> > > > > Not too late to fix?
> > > >
> > > >
> > > > I think that some of the corner cases can be understood from the coalescing
> > > > values.
> > > > For example:
> > > > if rx_usecs=0 we should wait for 0 usecs, meaning that we should send a
> > > > notification immediately.
> > > > But if rx_usecs=1 we should wait for 1 usec.
> > > > The case with max_packets is a little bit unclear for the values 0/1, and it
> > > > seems that in both cases we should send a notification immediately after
> > > > receiving/sending a packet.
> 
> Then we probably need to document this in the spec.
> 
> And we need an upper limit for those values, this helps for e.g
> migration compatibility.
> 
> Thanks

Not a bad idea generally but I suspect this is better done
as part of the admin queue/migration work then.


> > > >
> > > >
> > > > > So the spec says
> > > > >         Device supports notifications coalescing.
> > > > >
> > > > > which makes more sense - there's not a lot guest needs to do here.
> > > >
> > > >
> > > > Noted.
> > > >
> > > > > parameters?
> > > >
> > > >
> > > > I'll change it to "settings".
> > > >
> > > > > why with dash here? And why not just put the comments near the fields
> > > > > themselves?
> > > >
> > > >
> > > > Noted.
> > >
Michael S. Tsirkin Aug. 9, 2022, 7:11 p.m. UTC | #20
On Mon, Jul 18, 2022 at 12:11:02PM +0300, Alvaro Karsz wrote:
> New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL.
> 
> Control a Virtio network device notifications coalescing parameters
> using the control virtqueue.
> 
> A device that supports this fetature can receive
> VIRTIO_NET_CTRL_NOTF_COAL control commands.
> 
> - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET:
>   Ask the network device to change the following parameters:
>   - tx_usecs: Maximum number of usecs to delay a TX notification.
>   - tx_max_packets: Maximum number of packets to send before a
>     TX notification.
> 
> - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET:
>   Ask the network device to change the following parameters:
>   - rx_usecs: Maximum number of usecs to delay a RX notification.
>   - rx_max_packets: Maximum number of packets to receive before a
>     RX notification.
> 
> VirtIO spec. patch:
> https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html
> 
> Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com>
> ---
> v2:
> 	- Fix type assignments warnings found with sparse.
> 	- Fix a few typos.
> 
> v3:
>   - Change the coalescing parameters in a dedicated function.
>   - Return -EBUSY from the set coalescing function when the device's
>     link is up, even if the notifications coalescing feature is negotiated.
> 
> v4:
>   - If link is up and we need to update NAPI weight, return -EBUSY before
>     sending the coalescing commands to the device
> ---
>  drivers/net/virtio_net.c        | 111 +++++++++++++++++++++++++++-----
>  include/uapi/linux/virtio_net.h |  34 +++++++++-
>  2 files changed, 129 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 356cf8dd416..4fde66bd511 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -261,6 +261,12 @@ struct virtnet_info {
>  	u8 duplex;
>  	u32 speed;
> 
> +	/* Interrupt coalescing settings */
> +	u32 tx_usecs;
> +	u32 rx_usecs;
> +	u32 tx_max_packets;
> +	u32 rx_max_packets;
> +
>  	unsigned long guest_offloads;
>  	unsigned long guest_offloads_capable;
> 
> @@ -2587,27 +2593,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev,
>  	return 0;
>  }
> 
> +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
> +				       struct ethtool_coalesce *ec)
> +{
> +	struct scatterlist sgs_tx, sgs_rx;
> +	struct virtio_net_ctrl_coal_tx coal_tx;
> +	struct virtio_net_ctrl_coal_rx coal_rx;
> +
> +	coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
> +	coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
> +	sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx));
> +
> +	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> +				  VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
> +				  &sgs_tx))
> +		return -EINVAL;
> +
> +	/* Save parameters */
> +	vi->tx_usecs = ec->tx_coalesce_usecs;
> +	vi->tx_max_packets = ec->tx_max_coalesced_frames;
> +
> +	coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
> +	coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
> +	sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx));
> +
> +	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> +				  VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
> +				  &sgs_rx))
> +		return -EINVAL;
> +
> +	/* Save parameters */
> +	vi->rx_usecs = ec->rx_coalesce_usecs;
> +	vi->rx_max_packets = ec->rx_max_coalesced_frames;
> +
> +	return 0;
> +}
> +
> +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
> +{
> +	/* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
> +	 * feature is negotiated.
> +	 */
> +	if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
> +		return -EOPNOTSUPP;
> +
> +	if (ec->tx_max_coalesced_frames > 1 ||
> +	    ec->rx_max_coalesced_frames != 1)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
>  static int virtnet_set_coalesce(struct net_device *dev,
>  				struct ethtool_coalesce *ec,
>  				struct kernel_ethtool_coalesce *kernel_coal,
>  				struct netlink_ext_ack *extack)
>  {
>  	struct virtnet_info *vi = netdev_priv(dev);
> -	int i, napi_weight;
> -
> -	if (ec->tx_max_coalesced_frames > 1 ||
> -	    ec->rx_max_coalesced_frames != 1)
> -		return -EINVAL;
> +	int ret, i, napi_weight;
> +	bool update_napi = false;
> 
> +	/* Can't change NAPI weight if the link is up */
>  	napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
>  	if (napi_weight ^ vi->sq[0].napi.weight) {
>  		if (dev->flags & IFF_UP)
>  			return -EBUSY;
> +		else
> +			update_napi = true;
> +	}
> +
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL))
> +		ret = virtnet_send_notf_coal_cmds(vi, ec);
> +	else
> +		ret = virtnet_coal_params_supported(ec);
> +
> +	if (ret)
> +		return ret;
> +
> +	if (update_napi) {
>  		for (i = 0; i < vi->max_queue_pairs; i++)
>  			vi->sq[i].napi.weight = napi_weight;
>  	}
> 
> -	return 0;
> +	return ret;
>  }
> 
>  static int virtnet_get_coalesce(struct net_device *dev,


OK so I thought hard about this. At the moment the only way
I see is this:

for tx max coalesced, interpret tx_max_coalesced_frames == 0
as napi disable. tx_max_coalesced_frames = 0 as napi
disable, tx_max_coalesced_frames != 0 as napi enable
and if the feature has been negotiated additionally forward
it to hardware. If not force tx_max_coalesced_frames <= 1
as we do now.

Without hardware support force rx_max_coalesced_frames == 1
as we do now otherwise do not force and forward to hardware.


> @@ -2615,16 +2683,19 @@ static int virtnet_get_coalesce(struct net_device *dev,
>  				struct kernel_ethtool_coalesce *kernel_coal,
>  				struct netlink_ext_ack *extack)
>  {
> -	struct ethtool_coalesce ec_default = {
> -		.cmd = ETHTOOL_GCOALESCE,
> -		.rx_max_coalesced_frames = 1,
> -	};
>  	struct virtnet_info *vi = netdev_priv(dev);
> 
> -	memcpy(ec, &ec_default, sizeof(ec_default));
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
> +		ec->rx_coalesce_usecs = vi->rx_usecs;
> +		ec->tx_coalesce_usecs = vi->tx_usecs;
> +		ec->tx_max_coalesced_frames = vi->tx_max_packets;
> +		ec->rx_max_coalesced_frames = vi->rx_max_packets;
> +	} else {
> +		ec->rx_max_coalesced_frames = 1;
> 
> -	if (vi->sq[0].napi.weight)
> -		ec->tx_max_coalesced_frames = 1;
> +		if (vi->sq[0].napi.weight)
> +			ec->tx_max_coalesced_frames = 1;
> +	}
> 
>  	return 0;
>  }
> @@ -2743,7 +2814,8 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
>  }
> 
>  static const struct ethtool_ops virtnet_ethtool_ops = {
> -	.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
> +	.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
> +		ETHTOOL_COALESCE_USECS,
>  	.get_drvinfo = virtnet_get_drvinfo,
>  	.get_link = ethtool_op_get_link,
>  	.get_ringparam = virtnet_get_ringparam,
> @@ -3411,6 +3483,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
>  	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
>  			     "VIRTIO_NET_F_CTRL_VQ") ||
>  	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
> +			     "VIRTIO_NET_F_CTRL_VQ") ||
> +	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
>  			     "VIRTIO_NET_F_CTRL_VQ"))) {
>  		return false;
>  	}
> @@ -3546,6 +3620,13 @@ static int virtnet_probe(struct virtio_device *vdev)
>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
>  		vi->mergeable_rx_bufs = true;
> 
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
> +		vi->rx_usecs = 0;
> +		vi->tx_usecs = 0;
> +		vi->tx_max_packets = 0;
> +		vi->rx_max_packets = 0;
> +	}
> +
>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
>  		vi->has_rss_hash_report = true;
> 
> @@ -3780,7 +3861,7 @@ static struct virtio_device_id id_table[] = {
>  	VIRTIO_NET_F_CTRL_MAC_ADDR, \
>  	VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
>  	VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
> -	VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT
> +	VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL
> 
>  static unsigned int features[] = {
>  	VIRTNET_FEATURES,
> diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> index 3f55a4215f1..29ced55514d 100644
> --- a/include/uapi/linux/virtio_net.h
> +++ b/include/uapi/linux/virtio_net.h
> @@ -56,7 +56,7 @@
>  #define VIRTIO_NET_F_MQ	22	/* Device supports Receive Flow
>  					 * Steering */
>  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
> -
> +#define VIRTIO_NET_F_NOTF_COAL	53	/* Guest can handle notifications coalescing */
>  #define VIRTIO_NET_F_HASH_REPORT  57	/* Supports hash report */
>  #define VIRTIO_NET_F_RSS	  60	/* Supports RSS RX steering */
>  #define VIRTIO_NET_F_RSC_EXT	  61	/* extended coalescing info */
> @@ -355,4 +355,36 @@ struct virtio_net_hash_config {
>  #define VIRTIO_NET_CTRL_GUEST_OFFLOADS   5
>  #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET        0
> 
> +/*
> + * Control notifications coalescing.
> + *
> + * Request the device to change the notifications coalescing parameters.
> + *
> + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit.
> + */
> +#define VIRTIO_NET_CTRL_NOTF_COAL		6
> +/*
> + * Set the tx-usecs/tx-max-packets patameters.
> + * tx-usecs - Maximum number of usecs to delay a TX notification.
> + * tx-max-packets - Maximum number of packets to send before a TX notification.
> + */
> +struct virtio_net_ctrl_coal_tx {
> +	__le32 tx_max_packets;
> +	__le32 tx_usecs;
> +};
> +
> +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET		0
> +
> +/*
> + * Set the rx-usecs/rx-max-packets patameters.
> + * rx-usecs - Maximum number of usecs to delay a RX notification.
> + * rx-max-frames - Maximum number of packets to receive before a RX notification.
> + */
> +struct virtio_net_ctrl_coal_rx {
> +	__le32 rx_max_packets;
> +	__le32 rx_usecs;
> +};
> +
> +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET		1
> +
>  #endif /* _UAPI_LINUX_VIRTIO_NET_H */
> --
> 2.32.0
Michael S. Tsirkin Aug. 9, 2022, 7:14 p.m. UTC | #21
On Mon, Jul 18, 2022 at 12:11:02PM +0300, Alvaro Karsz wrote:
> New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL.
> 
> Control a Virtio network device notifications coalescing parameters
> using the control virtqueue.
> 
> A device that supports this fetature can receive
> VIRTIO_NET_CTRL_NOTF_COAL control commands.
> 
> - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET:
>   Ask the network device to change the following parameters:
>   - tx_usecs: Maximum number of usecs to delay a TX notification.
>   - tx_max_packets: Maximum number of packets to send before a
>     TX notification.
> 
> - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET:
>   Ask the network device to change the following parameters:
>   - rx_usecs: Maximum number of usecs to delay a RX notification.
>   - rx_max_packets: Maximum number of packets to receive before a
>     RX notification.
> 
> VirtIO spec. patch:
> https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html
> 
> Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com>
> ---
> v2:
> 	- Fix type assignments warnings found with sparse.
> 	- Fix a few typos.
> 
> v3:
>   - Change the coalescing parameters in a dedicated function.
>   - Return -EBUSY from the set coalescing function when the device's
>     link is up, even if the notifications coalescing feature is negotiated.
> 
> v4:
>   - If link is up and we need to update NAPI weight, return -EBUSY before
>     sending the coalescing commands to the device
> ---


So given this got a bunch of acks I picked this up as is, but let's
address Jason's comments for wording in comments as a follow up patch
please.

>  drivers/net/virtio_net.c        | 111 +++++++++++++++++++++++++++-----
>  include/uapi/linux/virtio_net.h |  34 +++++++++-
>  2 files changed, 129 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 356cf8dd416..4fde66bd511 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -261,6 +261,12 @@ struct virtnet_info {
>  	u8 duplex;
>  	u32 speed;
> 
> +	/* Interrupt coalescing settings */
> +	u32 tx_usecs;
> +	u32 rx_usecs;
> +	u32 tx_max_packets;
> +	u32 rx_max_packets;
> +
>  	unsigned long guest_offloads;
>  	unsigned long guest_offloads_capable;
> 
> @@ -2587,27 +2593,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev,
>  	return 0;
>  }
> 
> +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
> +				       struct ethtool_coalesce *ec)
> +{
> +	struct scatterlist sgs_tx, sgs_rx;
> +	struct virtio_net_ctrl_coal_tx coal_tx;
> +	struct virtio_net_ctrl_coal_rx coal_rx;
> +
> +	coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
> +	coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
> +	sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx));
> +
> +	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> +				  VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
> +				  &sgs_tx))
> +		return -EINVAL;
> +
> +	/* Save parameters */
> +	vi->tx_usecs = ec->tx_coalesce_usecs;
> +	vi->tx_max_packets = ec->tx_max_coalesced_frames;
> +
> +	coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
> +	coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
> +	sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx));
> +
> +	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> +				  VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
> +				  &sgs_rx))
> +		return -EINVAL;
> +
> +	/* Save parameters */
> +	vi->rx_usecs = ec->rx_coalesce_usecs;
> +	vi->rx_max_packets = ec->rx_max_coalesced_frames;
> +
> +	return 0;
> +}
> +
> +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
> +{
> +	/* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
> +	 * feature is negotiated.
> +	 */
> +	if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
> +		return -EOPNOTSUPP;
> +
> +	if (ec->tx_max_coalesced_frames > 1 ||
> +	    ec->rx_max_coalesced_frames != 1)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
>  static int virtnet_set_coalesce(struct net_device *dev,
>  				struct ethtool_coalesce *ec,
>  				struct kernel_ethtool_coalesce *kernel_coal,
>  				struct netlink_ext_ack *extack)
>  {
>  	struct virtnet_info *vi = netdev_priv(dev);
> -	int i, napi_weight;
> -
> -	if (ec->tx_max_coalesced_frames > 1 ||
> -	    ec->rx_max_coalesced_frames != 1)
> -		return -EINVAL;
> +	int ret, i, napi_weight;
> +	bool update_napi = false;
> 
> +	/* Can't change NAPI weight if the link is up */
>  	napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
>  	if (napi_weight ^ vi->sq[0].napi.weight) {
>  		if (dev->flags & IFF_UP)
>  			return -EBUSY;
> +		else
> +			update_napi = true;
> +	}
> +
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL))
> +		ret = virtnet_send_notf_coal_cmds(vi, ec);
> +	else
> +		ret = virtnet_coal_params_supported(ec);
> +
> +	if (ret)
> +		return ret;
> +
> +	if (update_napi) {
>  		for (i = 0; i < vi->max_queue_pairs; i++)
>  			vi->sq[i].napi.weight = napi_weight;
>  	}
> 
> -	return 0;
> +	return ret;
>  }
> 
>  static int virtnet_get_coalesce(struct net_device *dev,
> @@ -2615,16 +2683,19 @@ static int virtnet_get_coalesce(struct net_device *dev,
>  				struct kernel_ethtool_coalesce *kernel_coal,
>  				struct netlink_ext_ack *extack)
>  {
> -	struct ethtool_coalesce ec_default = {
> -		.cmd = ETHTOOL_GCOALESCE,
> -		.rx_max_coalesced_frames = 1,
> -	};
>  	struct virtnet_info *vi = netdev_priv(dev);
> 
> -	memcpy(ec, &ec_default, sizeof(ec_default));
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
> +		ec->rx_coalesce_usecs = vi->rx_usecs;
> +		ec->tx_coalesce_usecs = vi->tx_usecs;
> +		ec->tx_max_coalesced_frames = vi->tx_max_packets;
> +		ec->rx_max_coalesced_frames = vi->rx_max_packets;
> +	} else {
> +		ec->rx_max_coalesced_frames = 1;
> 
> -	if (vi->sq[0].napi.weight)
> -		ec->tx_max_coalesced_frames = 1;
> +		if (vi->sq[0].napi.weight)
> +			ec->tx_max_coalesced_frames = 1;
> +	}
> 
>  	return 0;
>  }
> @@ -2743,7 +2814,8 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
>  }
> 
>  static const struct ethtool_ops virtnet_ethtool_ops = {
> -	.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
> +	.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
> +		ETHTOOL_COALESCE_USECS,
>  	.get_drvinfo = virtnet_get_drvinfo,
>  	.get_link = ethtool_op_get_link,
>  	.get_ringparam = virtnet_get_ringparam,
> @@ -3411,6 +3483,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
>  	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
>  			     "VIRTIO_NET_F_CTRL_VQ") ||
>  	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
> +			     "VIRTIO_NET_F_CTRL_VQ") ||
> +	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
>  			     "VIRTIO_NET_F_CTRL_VQ"))) {
>  		return false;
>  	}
> @@ -3546,6 +3620,13 @@ static int virtnet_probe(struct virtio_device *vdev)
>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
>  		vi->mergeable_rx_bufs = true;
> 
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
> +		vi->rx_usecs = 0;
> +		vi->tx_usecs = 0;
> +		vi->tx_max_packets = 0;
> +		vi->rx_max_packets = 0;
> +	}
> +
>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
>  		vi->has_rss_hash_report = true;
> 
> @@ -3780,7 +3861,7 @@ static struct virtio_device_id id_table[] = {
>  	VIRTIO_NET_F_CTRL_MAC_ADDR, \
>  	VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
>  	VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
> -	VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT
> +	VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL
> 
>  static unsigned int features[] = {
>  	VIRTNET_FEATURES,
> diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> index 3f55a4215f1..29ced55514d 100644
> --- a/include/uapi/linux/virtio_net.h
> +++ b/include/uapi/linux/virtio_net.h
> @@ -56,7 +56,7 @@
>  #define VIRTIO_NET_F_MQ	22	/* Device supports Receive Flow
>  					 * Steering */
>  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
> -
> +#define VIRTIO_NET_F_NOTF_COAL	53	/* Guest can handle notifications coalescing */
>  #define VIRTIO_NET_F_HASH_REPORT  57	/* Supports hash report */
>  #define VIRTIO_NET_F_RSS	  60	/* Supports RSS RX steering */
>  #define VIRTIO_NET_F_RSC_EXT	  61	/* extended coalescing info */
> @@ -355,4 +355,36 @@ struct virtio_net_hash_config {
>  #define VIRTIO_NET_CTRL_GUEST_OFFLOADS   5
>  #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET        0
> 
> +/*
> + * Control notifications coalescing.
> + *
> + * Request the device to change the notifications coalescing parameters.
> + *
> + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit.
> + */
> +#define VIRTIO_NET_CTRL_NOTF_COAL		6
> +/*
> + * Set the tx-usecs/tx-max-packets patameters.
> + * tx-usecs - Maximum number of usecs to delay a TX notification.
> + * tx-max-packets - Maximum number of packets to send before a TX notification.
> + */
> +struct virtio_net_ctrl_coal_tx {
> +	__le32 tx_max_packets;
> +	__le32 tx_usecs;
> +};
> +
> +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET		0
> +
> +/*
> + * Set the rx-usecs/rx-max-packets patameters.
> + * rx-usecs - Maximum number of usecs to delay a RX notification.
> + * rx-max-frames - Maximum number of packets to receive before a RX notification.
> + */
> +struct virtio_net_ctrl_coal_rx {
> +	__le32 rx_max_packets;
> +	__le32 rx_usecs;
> +};
> +
> +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET		1
> +
>  #endif /* _UAPI_LINUX_VIRTIO_NET_H */
> --
> 2.32.0
Xuan Zhuo Aug. 29, 2022, 8:46 a.m. UTC | #22
On Mon, 18 Jul 2022 12:11:02 +0300, Alvaro Karsz <alvaro.karsz@solid-run.com> wrote:
> New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL.
>
> Control a Virtio network device notifications coalescing parameters
> using the control virtqueue.
>
> A device that supports this fetature can receive
> VIRTIO_NET_CTRL_NOTF_COAL control commands.
>
> - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET:
>   Ask the network device to change the following parameters:
>   - tx_usecs: Maximum number of usecs to delay a TX notification.
>   - tx_max_packets: Maximum number of packets to send before a
>     TX notification.
>
> - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET:
>   Ask the network device to change the following parameters:
>   - rx_usecs: Maximum number of usecs to delay a RX notification.
>   - rx_max_packets: Maximum number of packets to receive before a
>     RX notification.
>
> VirtIO spec. patch:
> https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html
>
> Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com>


We are very interested in this, and I would like to know what other plans you
have in the future? Such as qemu, vhost-uer, vhost-net. And further development
work in the kernel.

Thanks.


> ---
> v2:
> 	- Fix type assignments warnings found with sparse.
> 	- Fix a few typos.
>
> v3:
>   - Change the coalescing parameters in a dedicated function.
>   - Return -EBUSY from the set coalescing function when the device's
>     link is up, even if the notifications coalescing feature is negotiated.
>
> v4:
>   - If link is up and we need to update NAPI weight, return -EBUSY before
>     sending the coalescing commands to the device
> ---
>  drivers/net/virtio_net.c        | 111 +++++++++++++++++++++++++++-----
>  include/uapi/linux/virtio_net.h |  34 +++++++++-
>  2 files changed, 129 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 356cf8dd416..4fde66bd511 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -261,6 +261,12 @@ struct virtnet_info {
>  	u8 duplex;
>  	u32 speed;
>
> +	/* Interrupt coalescing settings */
> +	u32 tx_usecs;
> +	u32 rx_usecs;
> +	u32 tx_max_packets;
> +	u32 rx_max_packets;
> +
>  	unsigned long guest_offloads;
>  	unsigned long guest_offloads_capable;
>
> @@ -2587,27 +2593,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev,
>  	return 0;
>  }
>
> +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
> +				       struct ethtool_coalesce *ec)
> +{
> +	struct scatterlist sgs_tx, sgs_rx;
> +	struct virtio_net_ctrl_coal_tx coal_tx;
> +	struct virtio_net_ctrl_coal_rx coal_rx;
> +
> +	coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
> +	coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
> +	sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx));
> +
> +	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> +				  VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
> +				  &sgs_tx))
> +		return -EINVAL;
> +
> +	/* Save parameters */
> +	vi->tx_usecs = ec->tx_coalesce_usecs;
> +	vi->tx_max_packets = ec->tx_max_coalesced_frames;
> +
> +	coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
> +	coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
> +	sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx));
> +
> +	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> +				  VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
> +				  &sgs_rx))
> +		return -EINVAL;
> +
> +	/* Save parameters */
> +	vi->rx_usecs = ec->rx_coalesce_usecs;
> +	vi->rx_max_packets = ec->rx_max_coalesced_frames;
> +
> +	return 0;
> +}
> +
> +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
> +{
> +	/* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
> +	 * feature is negotiated.
> +	 */
> +	if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
> +		return -EOPNOTSUPP;
> +
> +	if (ec->tx_max_coalesced_frames > 1 ||
> +	    ec->rx_max_coalesced_frames != 1)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
>  static int virtnet_set_coalesce(struct net_device *dev,
>  				struct ethtool_coalesce *ec,
>  				struct kernel_ethtool_coalesce *kernel_coal,
>  				struct netlink_ext_ack *extack)
>  {
>  	struct virtnet_info *vi = netdev_priv(dev);
> -	int i, napi_weight;
> -
> -	if (ec->tx_max_coalesced_frames > 1 ||
> -	    ec->rx_max_coalesced_frames != 1)
> -		return -EINVAL;
> +	int ret, i, napi_weight;
> +	bool update_napi = false;
>
> +	/* Can't change NAPI weight if the link is up */
>  	napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
>  	if (napi_weight ^ vi->sq[0].napi.weight) {
>  		if (dev->flags & IFF_UP)
>  			return -EBUSY;
> +		else
> +			update_napi = true;
> +	}
> +
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL))
> +		ret = virtnet_send_notf_coal_cmds(vi, ec);
> +	else
> +		ret = virtnet_coal_params_supported(ec);
> +
> +	if (ret)
> +		return ret;
> +
> +	if (update_napi) {
>  		for (i = 0; i < vi->max_queue_pairs; i++)
>  			vi->sq[i].napi.weight = napi_weight;
>  	}
>
> -	return 0;
> +	return ret;
>  }
>
>  static int virtnet_get_coalesce(struct net_device *dev,
> @@ -2615,16 +2683,19 @@ static int virtnet_get_coalesce(struct net_device *dev,
>  				struct kernel_ethtool_coalesce *kernel_coal,
>  				struct netlink_ext_ack *extack)
>  {
> -	struct ethtool_coalesce ec_default = {
> -		.cmd = ETHTOOL_GCOALESCE,
> -		.rx_max_coalesced_frames = 1,
> -	};
>  	struct virtnet_info *vi = netdev_priv(dev);
>
> -	memcpy(ec, &ec_default, sizeof(ec_default));
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
> +		ec->rx_coalesce_usecs = vi->rx_usecs;
> +		ec->tx_coalesce_usecs = vi->tx_usecs;
> +		ec->tx_max_coalesced_frames = vi->tx_max_packets;
> +		ec->rx_max_coalesced_frames = vi->rx_max_packets;
> +	} else {
> +		ec->rx_max_coalesced_frames = 1;
>
> -	if (vi->sq[0].napi.weight)
> -		ec->tx_max_coalesced_frames = 1;
> +		if (vi->sq[0].napi.weight)
> +			ec->tx_max_coalesced_frames = 1;
> +	}
>
>  	return 0;
>  }
> @@ -2743,7 +2814,8 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
>  }
>
>  static const struct ethtool_ops virtnet_ethtool_ops = {
> -	.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
> +	.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
> +		ETHTOOL_COALESCE_USECS,
>  	.get_drvinfo = virtnet_get_drvinfo,
>  	.get_link = ethtool_op_get_link,
>  	.get_ringparam = virtnet_get_ringparam,
> @@ -3411,6 +3483,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
>  	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
>  			     "VIRTIO_NET_F_CTRL_VQ") ||
>  	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
> +			     "VIRTIO_NET_F_CTRL_VQ") ||
> +	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
>  			     "VIRTIO_NET_F_CTRL_VQ"))) {
>  		return false;
>  	}
> @@ -3546,6 +3620,13 @@ static int virtnet_probe(struct virtio_device *vdev)
>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
>  		vi->mergeable_rx_bufs = true;
>
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
> +		vi->rx_usecs = 0;
> +		vi->tx_usecs = 0;
> +		vi->tx_max_packets = 0;
> +		vi->rx_max_packets = 0;
> +	}
> +
>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
>  		vi->has_rss_hash_report = true;
>
> @@ -3780,7 +3861,7 @@ static struct virtio_device_id id_table[] = {
>  	VIRTIO_NET_F_CTRL_MAC_ADDR, \
>  	VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
>  	VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
> -	VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT
> +	VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL
>
>  static unsigned int features[] = {
>  	VIRTNET_FEATURES,
> diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> index 3f55a4215f1..29ced55514d 100644
> --- a/include/uapi/linux/virtio_net.h
> +++ b/include/uapi/linux/virtio_net.h
> @@ -56,7 +56,7 @@
>  #define VIRTIO_NET_F_MQ	22	/* Device supports Receive Flow
>  					 * Steering */
>  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
> -
> +#define VIRTIO_NET_F_NOTF_COAL	53	/* Guest can handle notifications coalescing */
>  #define VIRTIO_NET_F_HASH_REPORT  57	/* Supports hash report */
>  #define VIRTIO_NET_F_RSS	  60	/* Supports RSS RX steering */
>  #define VIRTIO_NET_F_RSC_EXT	  61	/* extended coalescing info */
> @@ -355,4 +355,36 @@ struct virtio_net_hash_config {
>  #define VIRTIO_NET_CTRL_GUEST_OFFLOADS   5
>  #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET        0
>
> +/*
> + * Control notifications coalescing.
> + *
> + * Request the device to change the notifications coalescing parameters.
> + *
> + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit.
> + */
> +#define VIRTIO_NET_CTRL_NOTF_COAL		6
> +/*
> + * Set the tx-usecs/tx-max-packets patameters.
> + * tx-usecs - Maximum number of usecs to delay a TX notification.
> + * tx-max-packets - Maximum number of packets to send before a TX notification.
> + */
> +struct virtio_net_ctrl_coal_tx {
> +	__le32 tx_max_packets;
> +	__le32 tx_usecs;
> +};
> +
> +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET		0
> +
> +/*
> + * Set the rx-usecs/rx-max-packets patameters.
> + * rx-usecs - Maximum number of usecs to delay a RX notification.
> + * rx-max-frames - Maximum number of packets to receive before a RX notification.
> + */
> +struct virtio_net_ctrl_coal_rx {
> +	__le32 rx_max_packets;
> +	__le32 rx_usecs;
> +};
> +
> +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET		1
> +
>  #endif /* _UAPI_LINUX_VIRTIO_NET_H */
> --
> 2.32.0
Alvaro Karsz Aug. 29, 2022, 9:14 a.m. UTC | #23
> We are very interested in this, and I would like to know what other plans you
> have in the future? Such as qemu, vhost-uer, vhost-net. And further development
> work in the kernel.

Hi Xuan,
I'm actually working on a VirtIO compatible DPU, no virtualization at
all, so I wasn't planning on implementing it in qemu or vhost.

I have some more development plans for the VirtIO spec and for the
linux kernel in virtio-net (and maybe virtio-blk).
Adding a timeout to the control vq is one example (needed with
physical VirtIO devices).

I would of course help to implement the notifications coalescing
feature in qemu/vhost if you need.

Alvaro
Xuan Zhuo Aug. 29, 2022, 9:18 a.m. UTC | #24
On Mon, 29 Aug 2022 12:14:54 +0300, Alvaro Karsz <alvaro.karsz@solid-run.com> wrote:
> > We are very interested in this, and I would like to know what other plans you
> > have in the future? Such as qemu, vhost-uer, vhost-net. And further development
> > work in the kernel.
>
> Hi Xuan,
> I'm actually working on a VirtIO compatible DPU, no virtualization at
> all, so I wasn't planning on implementing it in qemu or vhost.
>
> I have some more development plans for the VirtIO spec and for the
> linux kernel in virtio-net (and maybe virtio-blk).
> Adding a timeout to the control vq is one example (needed with
> physical VirtIO devices).
>
> I would of course help to implement the notifications coalescing
> feature in qemu/vhost if you need.


We'll do some implementation of net dim based on your previous work. In the
meantime, if you don't implement coalescing on backends like qemu, we might
do some work on that.

Thanks.


>
> Alvaro
diff mbox series

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 356cf8dd416..4fde66bd511 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -261,6 +261,12 @@  struct virtnet_info {
 	u8 duplex;
 	u32 speed;

+	/* Interrupt coalescing settings */
+	u32 tx_usecs;
+	u32 rx_usecs;
+	u32 tx_max_packets;
+	u32 rx_max_packets;
+
 	unsigned long guest_offloads;
 	unsigned long guest_offloads_capable;

@@ -2587,27 +2593,89 @@  static int virtnet_get_link_ksettings(struct net_device *dev,
 	return 0;
 }

+static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
+				       struct ethtool_coalesce *ec)
+{
+	struct scatterlist sgs_tx, sgs_rx;
+	struct virtio_net_ctrl_coal_tx coal_tx;
+	struct virtio_net_ctrl_coal_rx coal_rx;
+
+	coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
+	coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
+	sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx));
+
+	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
+				  VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
+				  &sgs_tx))
+		return -EINVAL;
+
+	/* Save parameters */
+	vi->tx_usecs = ec->tx_coalesce_usecs;
+	vi->tx_max_packets = ec->tx_max_coalesced_frames;
+
+	coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
+	coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
+	sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx));
+
+	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
+				  VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
+				  &sgs_rx))
+		return -EINVAL;
+
+	/* Save parameters */
+	vi->rx_usecs = ec->rx_coalesce_usecs;
+	vi->rx_max_packets = ec->rx_max_coalesced_frames;
+
+	return 0;
+}
+
+static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
+{
+	/* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
+	 * feature is negotiated.
+	 */
+	if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
+		return -EOPNOTSUPP;
+
+	if (ec->tx_max_coalesced_frames > 1 ||
+	    ec->rx_max_coalesced_frames != 1)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int virtnet_set_coalesce(struct net_device *dev,
 				struct ethtool_coalesce *ec,
 				struct kernel_ethtool_coalesce *kernel_coal,
 				struct netlink_ext_ack *extack)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
-	int i, napi_weight;
-
-	if (ec->tx_max_coalesced_frames > 1 ||
-	    ec->rx_max_coalesced_frames != 1)
-		return -EINVAL;
+	int ret, i, napi_weight;
+	bool update_napi = false;

+	/* Can't change NAPI weight if the link is up */
 	napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
 	if (napi_weight ^ vi->sq[0].napi.weight) {
 		if (dev->flags & IFF_UP)
 			return -EBUSY;
+		else
+			update_napi = true;
+	}
+
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL))
+		ret = virtnet_send_notf_coal_cmds(vi, ec);
+	else
+		ret = virtnet_coal_params_supported(ec);
+
+	if (ret)
+		return ret;
+
+	if (update_napi) {
 		for (i = 0; i < vi->max_queue_pairs; i++)
 			vi->sq[i].napi.weight = napi_weight;
 	}

-	return 0;
+	return ret;
 }

 static int virtnet_get_coalesce(struct net_device *dev,
@@ -2615,16 +2683,19 @@  static int virtnet_get_coalesce(struct net_device *dev,
 				struct kernel_ethtool_coalesce *kernel_coal,
 				struct netlink_ext_ack *extack)
 {
-	struct ethtool_coalesce ec_default = {
-		.cmd = ETHTOOL_GCOALESCE,
-		.rx_max_coalesced_frames = 1,
-	};
 	struct virtnet_info *vi = netdev_priv(dev);

-	memcpy(ec, &ec_default, sizeof(ec_default));
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
+		ec->rx_coalesce_usecs = vi->rx_usecs;
+		ec->tx_coalesce_usecs = vi->tx_usecs;
+		ec->tx_max_coalesced_frames = vi->tx_max_packets;
+		ec->rx_max_coalesced_frames = vi->rx_max_packets;
+	} else {
+		ec->rx_max_coalesced_frames = 1;

-	if (vi->sq[0].napi.weight)
-		ec->tx_max_coalesced_frames = 1;
+		if (vi->sq[0].napi.weight)
+			ec->tx_max_coalesced_frames = 1;
+	}

 	return 0;
 }
@@ -2743,7 +2814,8 @@  static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
 }

 static const struct ethtool_ops virtnet_ethtool_ops = {
-	.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
+	.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
+		ETHTOOL_COALESCE_USECS,
 	.get_drvinfo = virtnet_get_drvinfo,
 	.get_link = ethtool_op_get_link,
 	.get_ringparam = virtnet_get_ringparam,
@@ -3411,6 +3483,8 @@  static bool virtnet_validate_features(struct virtio_device *vdev)
 	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
 			     "VIRTIO_NET_F_CTRL_VQ") ||
 	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
+			     "VIRTIO_NET_F_CTRL_VQ") ||
+	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
 			     "VIRTIO_NET_F_CTRL_VQ"))) {
 		return false;
 	}
@@ -3546,6 +3620,13 @@  static int virtnet_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;

+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
+		vi->rx_usecs = 0;
+		vi->tx_usecs = 0;
+		vi->tx_max_packets = 0;
+		vi->rx_max_packets = 0;
+	}
+
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
 		vi->has_rss_hash_report = true;

@@ -3780,7 +3861,7 @@  static struct virtio_device_id id_table[] = {
 	VIRTIO_NET_F_CTRL_MAC_ADDR, \
 	VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
 	VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
-	VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT
+	VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL

 static unsigned int features[] = {
 	VIRTNET_FEATURES,
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index 3f55a4215f1..29ced55514d 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -56,7 +56,7 @@ 
 #define VIRTIO_NET_F_MQ	22	/* Device supports Receive Flow
 					 * Steering */
 #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
-
+#define VIRTIO_NET_F_NOTF_COAL	53	/* Guest can handle notifications coalescing */
 #define VIRTIO_NET_F_HASH_REPORT  57	/* Supports hash report */
 #define VIRTIO_NET_F_RSS	  60	/* Supports RSS RX steering */
 #define VIRTIO_NET_F_RSC_EXT	  61	/* extended coalescing info */
@@ -355,4 +355,36 @@  struct virtio_net_hash_config {
 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS   5
 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET        0

+/*
+ * Control notifications coalescing.
+ *
+ * Request the device to change the notifications coalescing parameters.
+ *
+ * Available with the VIRTIO_NET_F_NOTF_COAL feature bit.
+ */
+#define VIRTIO_NET_CTRL_NOTF_COAL		6
+/*
+ * Set the tx-usecs/tx-max-packets patameters.
+ * tx-usecs - Maximum number of usecs to delay a TX notification.
+ * tx-max-packets - Maximum number of packets to send before a TX notification.
+ */
+struct virtio_net_ctrl_coal_tx {
+	__le32 tx_max_packets;
+	__le32 tx_usecs;
+};
+
+#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET		0
+
+/*
+ * Set the rx-usecs/rx-max-packets patameters.
+ * rx-usecs - Maximum number of usecs to delay a RX notification.
+ * rx-max-frames - Maximum number of packets to receive before a RX notification.
+ */
+struct virtio_net_ctrl_coal_rx {
+	__le32 rx_max_packets;
+	__le32 rx_usecs;
+};
+
+#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET		1
+
 #endif /* _UAPI_LINUX_VIRTIO_NET_H */