diff mbox

[v1] virtio-net: enable configurable tx queue size

Message ID 1496653049-44530-1-git-send-email-wei.w.wang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wang, Wei W June 5, 2017, 8:57 a.m. UTC
This patch enables the virtio-net tx queue size to be configurable
between 256 and 1024 by the user. The queue size specified by the
user should be power of 2. If "tx_queue_size" is not offered by the
user, the default queue size, 1024, will be used.

For the traditional QEMU backend, setting the tx queue size to be 1024
requires the guest virtio driver to support the VIRTIO_F_MAX_CHAIN_SIZE
feature. This feature restricts the guest driver from chaining 1024
vring descriptors, which may cause the device side implementation to
send more than 1024 iov to writev.

VIRTIO_F_MAX_CHAIN_SIZE is a common transport feature added for all
virtio devices. However, each device has the flexibility to set the max
chain size to limit its driver to chain vring descriptors. Currently,
the max chain size of the virtio-net device is set to 1023.

In the case that the tx queue size is set to 1024 and the
VIRTIO_F_MAX_CHAIN_SIZE feature is not supported by the guest driver,
the tx queue size will be reconfigured to be 512.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>

RFC to v1 changes:
1) change VIRTIO_F_MAX_CHAIN_SIZE to be a common virtio feature (was
virtio-net specific);
2) change the default tx queue size to be 1024 (was 256);
3) For the vhost backend case, setting tx queue size to be 1024 dosen't
require the VIRTIO_F_MAX_CHAIN_SIZE feature support.
---
 hw/net/virtio-net.c                            | 69 ++++++++++++++++++++++++--
 include/hw/virtio/virtio-net.h                 |  1 +
 include/hw/virtio/virtio.h                     |  2 +
 include/standard-headers/linux/virtio_config.h |  3 ++
 include/standard-headers/linux/virtio_net.h    |  2 +
 5 files changed, 73 insertions(+), 4 deletions(-)

Comments

Michael S. Tsirkin June 5, 2017, 3:38 p.m. UTC | #1
On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote:
> This patch enables the virtio-net tx queue size to be configurable
> between 256 and 1024 by the user. The queue size specified by the
> user should be power of 2. If "tx_queue_size" is not offered by the
> user, the default queue size, 1024, will be used.
> 
> For the traditional QEMU backend, setting the tx queue size to be 1024
> requires the guest virtio driver to support the VIRTIO_F_MAX_CHAIN_SIZE
> feature. This feature restricts the guest driver from chaining 1024
> vring descriptors, which may cause the device side implementation to
> send more than 1024 iov to writev.
> 
> VIRTIO_F_MAX_CHAIN_SIZE is a common transport feature added for all
> virtio devices. However, each device has the flexibility to set the max
> chain size to limit its driver to chain vring descriptors. Currently,
> the max chain size of the virtio-net device is set to 1023.
> 
> In the case that the tx queue size is set to 1024 and the
> VIRTIO_F_MAX_CHAIN_SIZE feature is not supported by the guest driver,
> the tx queue size will be reconfigured to be 512.

I'd like to see the reverse. Start with the current default.
If VIRTIO_F_MAX_CHAIN_SIZE is negotiated, increase the queue size.

> Signed-off-by: Wei Wang <wei.w.wang@intel.com>

below should come after ---

> RFC to v1 changes:
> 1) change VIRTIO_F_MAX_CHAIN_SIZE to be a common virtio feature (was
> virtio-net specific);
> 2) change the default tx queue size to be 1024 (was 256);
> 3) For the vhost backend case, setting tx queue size to be 1024 dosen't
> require the VIRTIO_F_MAX_CHAIN_SIZE feature support.
> ---
>  hw/net/virtio-net.c                            | 69 ++++++++++++++++++++++++--
>  include/hw/virtio/virtio-net.h                 |  1 +
>  include/hw/virtio/virtio.h                     |  2 +
>  include/standard-headers/linux/virtio_config.h |  3 ++
>  include/standard-headers/linux/virtio_net.h    |  2 +
>  5 files changed, 73 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 7d091c9..5c82f54 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -33,8 +33,13 @@
>  
>  /* previously fixed value */
>  #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
> +#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 1024
> +
>  /* for now, only allow larger queues; with virtio-1, guest can downsize */
>  #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
> +#define VIRTIO_NET_TX_QUEUE_MIN_SIZE 256
> +
> +#define VIRTIO_NET_MAX_CHAIN_SIZE 1023
>  
>  /*
>   * Calculate the number of bytes up to and including the given 'field' of
> @@ -57,6 +62,8 @@ static VirtIOFeature feature_sizes[] = {
>       .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
>      {.flags = 1 << VIRTIO_NET_F_MTU,
>       .end = endof(struct virtio_net_config, mtu)},
> +    {.flags = 1 << VIRTIO_F_MAX_CHAIN_SIZE,
> +     .end = endof(struct virtio_net_config, max_chain_size)},
>      {}
>  };
>

Using a generic VIRTIO_F_MAX_CHAIN_SIZE flag, so this should go into the
generic virtio section, not virtio_net_config.
  
> @@ -84,6 +91,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
>      virtio_stw_p(vdev, &netcfg.status, n->status);
>      virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
>      virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
> +    virtio_stw_p(vdev, &netcfg.max_chain_size, VIRTIO_NET_MAX_CHAIN_SIZE);
>      memcpy(netcfg.mac, n->mac, ETH_ALEN);
>      memcpy(config, &netcfg, n->config_size);
>  }
> @@ -635,9 +643,33 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
>      return virtio_net_guest_offloads_by_features(vdev->guest_features);
>  }
>  
> +static bool is_tx(int queue_index)
> +{
> +    return queue_index % 2 == 1;
> +}
> +
> +static void virtio_net_reconfig_tx_queue_size(VirtIONet *n)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(n);
> +    int i, num_queues = virtio_get_num_queues(vdev);
> +
> +    /* Return when the queue size is already less than the 1024 */
> +    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE) {
> +        return;
> +    }
> +
> +    for (i = 0; i < num_queues; i++) {
> +        if (is_tx(i)) {
> +            n->net_conf.tx_queue_size = VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE / 2;
> +            virtio_queue_set_num(vdev, i, n->net_conf.tx_queue_size);
> +        }
> +    }
> +}
> +
>  static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
>  {
>      VirtIONet *n = VIRTIO_NET(vdev);
> +    NetClientState *nc = qemu_get_queue(n->nic);
>      int i;
>  
>      virtio_net_set_multiqueue(n,
> @@ -649,6 +681,16 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
>                                 virtio_has_feature(features,
>                                                    VIRTIO_F_VERSION_1));
>  
> +    /*
> +     * When the traditional QEMU backend is used, using 1024 tx queue size
> +     * requires the driver to support the VIRTIO_F_MAX_CHAIN_SIZE feature. If
> +     * the feature is not supported, reconfigure the tx queue size to 512.
> +     */
> +    if (!get_vhost_net(nc->peer) &&
> +        !virtio_has_feature(features, VIRTIO_F_MAX_CHAIN_SIZE)) {
> +        virtio_net_reconfig_tx_queue_size(n);
> +    }
> +
>      if (n->has_vnet_hdr) {
>          n->curr_guest_offloads =
>              virtio_net_guest_offloads_by_features(features);
> @@ -1297,8 +1339,8 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
>  
>          out_num = elem->out_num;
>          out_sg = elem->out_sg;
> -        if (out_num < 1) {
> -            virtio_error(vdev, "virtio-net header not in first element");
> +        if (out_num < 1 || out_num > VIRTIO_NET_MAX_CHAIN_SIZE) {
> +            virtio_error(vdev, "no packet or too large vring desc chain");
>              virtqueue_detach_element(q->tx_vq, elem, 0);
>              g_free(elem);
>              return -EINVAL;

what about rx vq? we need to limit that as well, do we not?


> @@ -1496,13 +1538,15 @@ static void virtio_net_add_queue(VirtIONet *n, int index)
>                                             virtio_net_handle_rx);
>      if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
>          n->vqs[index].tx_vq =
> -            virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
> +            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
> +                             virtio_net_handle_tx_timer);
>          n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
>                                                virtio_net_tx_timer,
>                                                &n->vqs[index]);
>      } else {
>          n->vqs[index].tx_vq =
> -            virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
> +            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
> +                             virtio_net_handle_tx_bh);
>          n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
>      }
>  
> @@ -1891,6 +1935,10 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>          n->host_features |= (0x1 << VIRTIO_NET_F_MTU);
>      }
>  
> +    if (virtio_host_has_feature(vdev, VIRTIO_F_MAX_CHAIN_SIZE)) {
> +        n->host_features |= (0x1 << VIRTIO_F_MAX_CHAIN_SIZE);
> +    }
> +
>      virtio_net_set_config_size(n, n->host_features);
>      virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
>  
> @@ -1910,6 +1958,17 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>          return;
>      }
>  
> +    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
> +        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
> +        (n->net_conf.tx_queue_size & (n->net_conf.tx_queue_size - 1))) {

Pls use is_power_of_2.

> +        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
> +                   "must be a power of 2 between %d and %d.",
> +                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
> +                   VIRTQUEUE_MAX_SIZE);

I think management will need a way to discover the limits for
this value. Not sure how. Cc QAPI maintainers.


> +        virtio_cleanup(vdev);
> +        return;
> +    }
> +
>      n->max_queues = MAX(n->nic_conf.peers.queues, 1);
>      if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
>          error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
> @@ -2089,6 +2148,8 @@ static Property virtio_net_properties[] = {
>      DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
>      DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
>                         VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
> +    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
> +                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
>      DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
>      DEFINE_PROP_END_OF_LIST(),
>  };
> diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
> index 1eec9a2..fd944ba 100644
> --- a/include/hw/virtio/virtio-net.h
> +++ b/include/hw/virtio/virtio-net.h
> @@ -36,6 +36,7 @@ typedef struct virtio_net_conf
>      int32_t txburst;
>      char *tx;
>      uint16_t rx_queue_size;
> +    uint16_t tx_queue_size;
>      uint16_t mtu;
>  } virtio_net_conf;
>  
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 7b6edba..8e85e41 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -260,6 +260,8 @@ typedef struct VirtIORNGConf VirtIORNGConf;
>                        VIRTIO_F_NOTIFY_ON_EMPTY, true), \
>      DEFINE_PROP_BIT64("any_layout", _state, _field, \
>                        VIRTIO_F_ANY_LAYOUT, true), \
> +    DEFINE_PROP_BIT64("max_chain_size", _state, _field, \
> +                      VIRTIO_F_MAX_CHAIN_SIZE, true), \
>      DEFINE_PROP_BIT64("iommu_platform", _state, _field, \
>                        VIRTIO_F_IOMMU_PLATFORM, false)
>  
> diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h
> index b777069..b70cbfe 100644
> --- a/include/standard-headers/linux/virtio_config.h
> +++ b/include/standard-headers/linux/virtio_config.h
> @@ -60,6 +60,9 @@
>  #define VIRTIO_F_ANY_LAYOUT		27
>  #endif /* VIRTIO_CONFIG_NO_LEGACY */
>  
> +/* Guest chains vring descriptors within a limit */
> +#define VIRTIO_F_MAX_CHAIN_SIZE		31
> +

Pls use a flag in the high 32 bit.

>  /* v1.0 compliant. */
>  #define VIRTIO_F_VERSION_1		32
>  
> diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h
> index 30ff249..729aaa8 100644
> --- a/include/standard-headers/linux/virtio_net.h
> +++ b/include/standard-headers/linux/virtio_net.h
> @@ -76,6 +76,8 @@ struct virtio_net_config {
>  	uint16_t max_virtqueue_pairs;
>  	/* Default maximum transmit unit advice */
>  	uint16_t mtu;
> +        /* Maximum number of vring descriptors that can be chained */
> +	uint16_t max_chain_size;
>  } QEMU_PACKED;
>  
>  /*
> -- 
> 2.7.4
Eric Blake June 5, 2017, 3:41 p.m. UTC | #2
On 06/05/2017 10:38 AM, Michael S. Tsirkin wrote:
> On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote:
>> This patch enables the virtio-net tx queue size to be configurable
>> between 256 and 1024 by the user. The queue size specified by the
>> user should be power of 2. If "tx_queue_size" is not offered by the
>> user, the default queue size, 1024, will be used.
>>

>> +    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
>> +        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
>> +        (n->net_conf.tx_queue_size & (n->net_conf.tx_queue_size - 1))) {
> 
> Pls use is_power_of_2.
> 
>> +        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
>> +                   "must be a power of 2 between %d and %d.",

No trailing '.' in error_setg() messages, please.

>> +                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
>> +                   VIRTQUEUE_MAX_SIZE);
> 
> I think management will need a way to discover the limits for
> this value. Not sure how. Cc QAPI maintainers.

Is this something that can be documented? Or is it a runtime-variable
limit, where you will need to query the limit on a per-machine basis?
Given that it looks like compile-time documentation, I'm leaning towards
documentation being sufficient.
Michael S. Tsirkin June 5, 2017, 3:45 p.m. UTC | #3
On Mon, Jun 05, 2017 at 10:41:56AM -0500, Eric Blake wrote:
> On 06/05/2017 10:38 AM, Michael S. Tsirkin wrote:
> > On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote:
> >> This patch enables the virtio-net tx queue size to be configurable
> >> between 256 and 1024 by the user. The queue size specified by the
> >> user should be power of 2. If "tx_queue_size" is not offered by the
> >> user, the default queue size, 1024, will be used.
> >>
> 
> >> +    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
> >> +        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
> >> +        (n->net_conf.tx_queue_size & (n->net_conf.tx_queue_size - 1))) {
> > 
> > Pls use is_power_of_2.
> > 
> >> +        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
> >> +                   "must be a power of 2 between %d and %d.",
> 
> No trailing '.' in error_setg() messages, please.
> 
> >> +                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
> >> +                   VIRTQUEUE_MAX_SIZE);
> > 
> > I think management will need a way to discover the limits for
> > this value. Not sure how. Cc QAPI maintainers.
> 
> Is this something that can be documented? Or is it a runtime-variable
> limit, where you will need to query the limit on a per-machine basis?
> Given that it looks like compile-time documentation, I'm leaning towards
> documentation being sufficient.

We are likely to change the limits with time. E.g.
VIRTIO_NET_TX_QUEUE_MIN_SIZE is arbitrary.

> -- 
> Eric Blake, Principal Software Engineer
> Red Hat, Inc.           +1-919-301-3266
> Virtualization:  qemu.org | libvirt.org
>
Michael S. Tsirkin June 5, 2017, 3:47 p.m. UTC | #4
On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote:
> @@ -1910,6 +1958,17 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>          return;
>      }
>  
> +    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
> +        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
> +        (n->net_conf.tx_queue_size & (n->net_conf.tx_queue_size - 1))) {
> +        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
> +                   "must be a power of 2 between %d and %d.",
> +                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
> +                   VIRTQUEUE_MAX_SIZE);
> +        virtio_cleanup(vdev);
> +        return;
> +    }
> +
>      n->max_queues = MAX(n->nic_conf.peers.queues, 1);
>      if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
>          error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "

Given that some configurations (e.g. legacy guest) ignore the value,
I'm inclined to say just force the value to be within a reasonable
limit.
Wang, Wei W June 6, 2017, 3:32 a.m. UTC | #5
On 06/05/2017 11:38 PM, Michael S. Tsirkin wrote:
> On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote:
>   /*
>    * Calculate the number of bytes up to and including the given 'field' of
> @@ -57,6 +62,8 @@ static VirtIOFeature feature_sizes[] = {
>        .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
>       {.flags = 1 << VIRTIO_NET_F_MTU,
>        .end = endof(struct virtio_net_config, mtu)},
> +    {.flags = 1 << VIRTIO_F_MAX_CHAIN_SIZE,
> +     .end = endof(struct virtio_net_config, max_chain_size)},
>       {}
>   };
>
> Using a generic VIRTIO_F_MAX_CHAIN_SIZE flag, so this should go into the
> generic virtio section, not virtio_net_config.
>    
Do you mean VIRTIO_PCI_xx under virtio_pci.h?

The reason that the config field wasn't added there is because I didn't find
space to put into the new 16-bit field into the existing layout. The 
last one
is "#define VIRTIO_MSI_QUEUE_VECTOR 22".  The per-driver configuration
space directly follows that last register by
"#define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24:20)"

Changing the MACRO to something like
VIRTIO_PCI_CONFIG_OFF(max_chain_size_enabled) would be difficult.

Do you see a good way to add the new field here?


>> @@ -84,6 +91,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
>>       virtio_stw_p(vdev, &netcfg.status, n->status);
>>       virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
>>       virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
>> +    virtio_stw_p(vdev, &netcfg.max_chain_size, VIRTIO_NET_MAX_CHAIN_SIZE);
>>       memcpy(netcfg.mac, n->mac, ETH_ALEN);
>>       memcpy(config, &netcfg, n->config_size);
>>   }
>> @@ -635,9 +643,33 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
>>       return virtio_net_guest_offloads_by_features(vdev->guest_features);
>>   }
>>   
>> +static bool is_tx(int queue_index)
>> +{
>> +    return queue_index % 2 == 1;
>> +}
>> +
>> +static void virtio_net_reconfig_tx_queue_size(VirtIONet *n)
>> +{
>> +    VirtIODevice *vdev = VIRTIO_DEVICE(n);
>> +    int i, num_queues = virtio_get_num_queues(vdev);
>> +
>> +    /* Return when the queue size is already less than the 1024 */
>> +    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE) {
>> +        return;
>> +    }
>> +
>> +    for (i = 0; i < num_queues; i++) {
>> +        if (is_tx(i)) {
>> +            n->net_conf.tx_queue_size = VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE / 2;
>> +            virtio_queue_set_num(vdev, i, n->net_conf.tx_queue_size);
>> +        }
>> +    }
>> +}
>> +
>>   static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
>>   {
>>       VirtIONet *n = VIRTIO_NET(vdev);
>> +    NetClientState *nc = qemu_get_queue(n->nic);
>>       int i;
>>   
>>       virtio_net_set_multiqueue(n,
>> @@ -649,6 +681,16 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
>>                                  virtio_has_feature(features,
>>                                                     VIRTIO_F_VERSION_1));
>>   
>> +    /*
>> +     * When the traditional QEMU backend is used, using 1024 tx queue size
>> +     * requires the driver to support the VIRTIO_F_MAX_CHAIN_SIZE feature. If
>> +     * the feature is not supported, reconfigure the tx queue size to 512.
>> +     */
>> +    if (!get_vhost_net(nc->peer) &&
>> +        !virtio_has_feature(features, VIRTIO_F_MAX_CHAIN_SIZE)) {
>> +        virtio_net_reconfig_tx_queue_size(n);
>> +    }
>> +
>>       if (n->has_vnet_hdr) {
>>           n->curr_guest_offloads =
>>               virtio_net_guest_offloads_by_features(features);
>> @@ -1297,8 +1339,8 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
>>   
>>           out_num = elem->out_num;
>>           out_sg = elem->out_sg;
>> -        if (out_num < 1) {
>> -            virtio_error(vdev, "virtio-net header not in first element");
>> +        if (out_num < 1 || out_num > VIRTIO_NET_MAX_CHAIN_SIZE) {
>> +            virtio_error(vdev, "no packet or too large vring desc chain");
>>               virtqueue_detach_element(q->tx_vq, elem, 0);
>>               g_free(elem);
>>               return -EINVAL;
> what about rx vq? we need to limit that as well, do we not?
I think the rx vq doesn't need the limit, because there is no off-by-one 
issue
like the tx side writev().
>   
> diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h
> index b777069..b70cbfe 100644
> --- a/include/standard-headers/linux/virtio_config.h
> +++ b/include/standard-headers/linux/virtio_config.h
> @@ -60,6 +60,9 @@
>   #define VIRTIO_F_ANY_LAYOUT		27
>   #endif /* VIRTIO_CONFIG_NO_LEGACY */
>   
> +/* Guest chains vring descriptors within a limit */
> +#define VIRTIO_F_MAX_CHAIN_SIZE		31
> +
> Pls use a flag in the high 32 bit.

I think this should be considered as a transport feature. How about changing
VIRTIO_TRANSPORT_F_END to 35 (was 34), and use 34 for
VIRTIO_F_MAX_CHAIN_SIZE?

Best,
Wei
Wang, Wei W June 7, 2017, 1:04 a.m. UTC | #6
On 06/05/2017 11:38 PM, Michael S. Tsirkin wrote:
> On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote:
>> This patch enables the virtio-net tx queue size to be configurable
>> between 256 and 1024 by the user. The queue size specified by the
>> user should be power of 2. If "tx_queue_size" is not offered by the
>> user, the default queue size, 1024, will be used.
>>
>> For the traditional QEMU backend, setting the tx queue size to be 1024
>> requires the guest virtio driver to support the VIRTIO_F_MAX_CHAIN_SIZE
>> feature. This feature restricts the guest driver from chaining 1024
>> vring descriptors, which may cause the device side implementation to
>> send more than 1024 iov to writev.
>>
>> VIRTIO_F_MAX_CHAIN_SIZE is a common transport feature added for all
>> virtio devices. However, each device has the flexibility to set the max
>> chain size to limit its driver to chain vring descriptors. Currently,
>> the max chain size of the virtio-net device is set to 1023.
>>
>> In the case that the tx queue size is set to 1024 and the
>> VIRTIO_F_MAX_CHAIN_SIZE feature is not supported by the guest driver,
>> the tx queue size will be reconfigured to be 512.
> I'd like to see the reverse. Start with the current default.
> If VIRTIO_F_MAX_CHAIN_SIZE is negotiated, increase the queue size.
>

OK, we can let the queue size start with 256, and how about
increasing it to 1024 in the following two cases:
1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated; or
2) the backend is vhost.

Best,
Wei
Michael S. Tsirkin June 8, 2017, 7:01 p.m. UTC | #7
On Wed, Jun 07, 2017 at 09:04:29AM +0800, Wei Wang wrote:
> On 06/05/2017 11:38 PM, Michael S. Tsirkin wrote:
> > On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote:
> > > This patch enables the virtio-net tx queue size to be configurable
> > > between 256 and 1024 by the user. The queue size specified by the
> > > user should be power of 2. If "tx_queue_size" is not offered by the
> > > user, the default queue size, 1024, will be used.
> > > 
> > > For the traditional QEMU backend, setting the tx queue size to be 1024
> > > requires the guest virtio driver to support the VIRTIO_F_MAX_CHAIN_SIZE
> > > feature. This feature restricts the guest driver from chaining 1024
> > > vring descriptors, which may cause the device side implementation to
> > > send more than 1024 iov to writev.
> > > 
> > > VIRTIO_F_MAX_CHAIN_SIZE is a common transport feature added for all
> > > virtio devices. However, each device has the flexibility to set the max
> > > chain size to limit its driver to chain vring descriptors. Currently,
> > > the max chain size of the virtio-net device is set to 1023.
> > > 
> > > In the case that the tx queue size is set to 1024 and the
> > > VIRTIO_F_MAX_CHAIN_SIZE feature is not supported by the guest driver,
> > > the tx queue size will be reconfigured to be 512.
> > I'd like to see the reverse. Start with the current default.
> > If VIRTIO_F_MAX_CHAIN_SIZE is negotiated, increase the queue size.
> > 
> 
> OK, we can let the queue size start with 256, and how about
> increasing it to 1024 in the following two cases:

I think it should be
1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated
and
2) user requested large size

> 1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated; or
> 2) the backend is vhost.

For vhost we also need vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE.
We also need to send the max chain size to backend.

> Best,
> Wei
Wang, Wei W June 9, 2017, 3 a.m. UTC | #8
On 06/09/2017 03:01 AM, Michael S. Tsirkin wrote:
> On Wed, Jun 07, 2017 at 09:04:29AM +0800, Wei Wang wrote:
>> On 06/05/2017 11:38 PM, Michael S. Tsirkin wrote:
>>> On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote:
>>>> This patch enables the virtio-net tx queue size to be configurable
>>>> between 256 and 1024 by the user. The queue size specified by the
>>>> user should be power of 2. If "tx_queue_size" is not offered by the
>>>> user, the default queue size, 1024, will be used.
>>>>
>>>> For the traditional QEMU backend, setting the tx queue size to be 1024
>>>> requires the guest virtio driver to support the VIRTIO_F_MAX_CHAIN_SIZE
>>>> feature. This feature restricts the guest driver from chaining 1024
>>>> vring descriptors, which may cause the device side implementation to
>>>> send more than 1024 iov to writev.
>>>>
>>>> VIRTIO_F_MAX_CHAIN_SIZE is a common transport feature added for all
>>>> virtio devices. However, each device has the flexibility to set the max
>>>> chain size to limit its driver to chain vring descriptors. Currently,
>>>> the max chain size of the virtio-net device is set to 1023.
>>>>
>>>> In the case that the tx queue size is set to 1024 and the
>>>> VIRTIO_F_MAX_CHAIN_SIZE feature is not supported by the guest driver,
>>>> the tx queue size will be reconfigured to be 512.
>>> I'd like to see the reverse. Start with the current default.
>>> If VIRTIO_F_MAX_CHAIN_SIZE is negotiated, increase the queue size.
>>>
>> OK, we can let the queue size start with 256, and how about
>> increasing it to 1024 in the following two cases:
> I think it should be
> 1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated
> and
> 2) user requested large size
>
>> 1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated; or
>> 2) the backend is vhost.
> For vhost we also need vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE.
> We also need to send the max chain size to backend.
>
I think the limitation that we are dealing with is that the virtio-net
backend implementation in QEMU is possible to pass more than
1024 iov to writev. In this case, the QEMU backend uses the
"max_chain_size" register to tell the driver the max size of the
vring_desc chain. So, I think it should be the device (backend)
sending the max size to the driver, rather than the other way
around.

For the vhost-user and vhost-net backend cases, they don't have
such limitation as the QEMU backend, right?
If no such limitation, I think without the negotiation of
VIRTIO_F_MAX_CHAIN_SIZE, the device should be safe to use 1024
tx queue size if it is the vhost backend.

Best,
Wei
Wang, Wei W June 12, 2017, 9:30 a.m. UTC | #9
Ping for comments, thanks.

On 06/09/2017 11:00 AM, Wei Wang wrote:
> On 06/09/2017 03:01 AM, Michael S. Tsirkin wrote:
>> On Wed, Jun 07, 2017 at 09:04:29AM +0800, Wei Wang wrote:
>>> On 06/05/2017 11:38 PM, Michael S. Tsirkin wrote:
>>>> On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote:
>>>>> This patch enables the virtio-net tx queue size to be configurable
>>>>> between 256 and 1024 by the user. The queue size specified by the
>>>>> user should be power of 2. If "tx_queue_size" is not offered by the
>>>>> user, the default queue size, 1024, will be used.
>>>>>
>>>>> For the traditional QEMU backend, setting the tx queue size to be 
>>>>> 1024
>>>>> requires the guest virtio driver to support the 
>>>>> VIRTIO_F_MAX_CHAIN_SIZE
>>>>> feature. This feature restricts the guest driver from chaining 1024
>>>>> vring descriptors, which may cause the device side implementation to
>>>>> send more than 1024 iov to writev.
>>>>>
>>>>> VIRTIO_F_MAX_CHAIN_SIZE is a common transport feature added for all
>>>>> virtio devices. However, each device has the flexibility to set 
>>>>> the max
>>>>> chain size to limit its driver to chain vring descriptors. Currently,
>>>>> the max chain size of the virtio-net device is set to 1023.
>>>>>
>>>>> In the case that the tx queue size is set to 1024 and the
>>>>> VIRTIO_F_MAX_CHAIN_SIZE feature is not supported by the guest driver,
>>>>> the tx queue size will be reconfigured to be 512.
>>>> I'd like to see the reverse. Start with the current default.
>>>> If VIRTIO_F_MAX_CHAIN_SIZE is negotiated, increase the queue size.
>>>>
>>> OK, we can let the queue size start with 256, and how about
>>> increasing it to 1024 in the following two cases:
>> I think it should be
>> 1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated
>> and
>> 2) user requested large size
>>
>>> 1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated; or
>>> 2) the backend is vhost.
>> For vhost we also need vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE.
>> We also need to send the max chain size to backend.
>>
> I think the limitation that we are dealing with is that the virtio-net
> backend implementation in QEMU is possible to pass more than
> 1024 iov to writev. In this case, the QEMU backend uses the
> "max_chain_size" register to tell the driver the max size of the
> vring_desc chain. So, I think it should be the device (backend)
> sending the max size to the driver, rather than the other way
> around.
>
> For the vhost-user and vhost-net backend cases, they don't have
> such limitation as the QEMU backend, right?
> If no such limitation, I think without the negotiation of
> VIRTIO_F_MAX_CHAIN_SIZE, the device should be safe to use 1024
> tx queue size if it is the vhost backend.
>
> Best,
> Wei
>
>
>
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>
Michael S. Tsirkin June 12, 2017, 8:43 p.m. UTC | #10
On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote:
> Ping for comments, thanks.

This was only posted a week ago, might be a bit too short for some
people.  A couple of weeks is more reasonable before you ping.  Also, I
sent a bunch of comments on Thu, 8 Jun 2017.  You should probably
address these.
Wang, Wei W June 13, 2017, 3:10 a.m. UTC | #11
On 06/13/2017 04:43 AM, Michael S. Tsirkin wrote:
> On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote:
>> Ping for comments, thanks.
> This was only posted a week ago, might be a bit too short for some
> people.
OK, sorry for the push.
> A couple of weeks is more reasonable before you ping.  Also, I
> sent a bunch of comments on Thu, 8 Jun 2017.  You should probably
> address these.
>
I responded to the comments. The main question is that I'm not sure
why we need the vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE.
IMHO, that should be a feature proposed to solve the possible issue
caused by the QEMU implemented backend.

Best,
Wei
Jason Wang June 13, 2017, 3:19 a.m. UTC | #12
On 2017年06月13日 11:10, Wei Wang wrote:
> On 06/13/2017 04:43 AM, Michael S. Tsirkin wrote:
>> On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote:
>>> Ping for comments, thanks.
>> This was only posted a week ago, might be a bit too short for some
>> people.
> OK, sorry for the push.
>> A couple of weeks is more reasonable before you ping.  Also, I
>> sent a bunch of comments on Thu, 8 Jun 2017.  You should probably
>> address these.
>>
> I responded to the comments. The main question is that I'm not sure
> why we need the vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE.
> IMHO, that should be a feature proposed to solve the possible issue
> caused by the QEMU implemented backend.

The issue is what if there's a mismatch of max #sgs between qemu and vhost?

Thanks

>
> Best,
> Wei
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>
Wang, Wei W June 13, 2017, 3:51 a.m. UTC | #13
On 06/13/2017 11:19 AM, Jason Wang wrote:
>
>
> On 2017年06月13日 11:10, Wei Wang wrote:
>> On 06/13/2017 04:43 AM, Michael S. Tsirkin wrote:
>>> On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote:
>>>> Ping for comments, thanks.
>>> This was only posted a week ago, might be a bit too short for some
>>> people.
>> OK, sorry for the push.
>>> A couple of weeks is more reasonable before you ping.  Also, I
>>> sent a bunch of comments on Thu, 8 Jun 2017.  You should probably
>>> address these.
>>>
>> I responded to the comments. The main question is that I'm not sure
>> why we need the vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE.
>> IMHO, that should be a feature proposed to solve the possible issue
>> caused by the QEMU implemented backend.
>
> The issue is what if there's a mismatch of max #sgs between qemu and 
> vhost?
>

When the vhost backend is used, QEMU is not involved in the data path. 
The vhost backend
directly gets what is offered by the guest from the vq. Why would there 
be a mismatch of
max #sgs between QEMU and vhost, and what is the QEMU side max #sgs used 
for? Thanks.

Best,
Wei
Jason Wang June 13, 2017, 3:55 a.m. UTC | #14
On 2017年06月13日 11:51, Wei Wang wrote:
> On 06/13/2017 11:19 AM, Jason Wang wrote:
>>
>>
>> On 2017年06月13日 11:10, Wei Wang wrote:
>>> On 06/13/2017 04:43 AM, Michael S. Tsirkin wrote:
>>>> On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote:
>>>>> Ping for comments, thanks.
>>>> This was only posted a week ago, might be a bit too short for some
>>>> people.
>>> OK, sorry for the push.
>>>> A couple of weeks is more reasonable before you ping.  Also, I
>>>> sent a bunch of comments on Thu, 8 Jun 2017.  You should probably
>>>> address these.
>>>>
>>> I responded to the comments. The main question is that I'm not sure
>>> why we need the vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE.
>>> IMHO, that should be a feature proposed to solve the possible issue
>>> caused by the QEMU implemented backend.
>>
>> The issue is what if there's a mismatch of max #sgs between qemu and 
>> vhost?
>>
>
> When the vhost backend is used, QEMU is not involved in the data path. 
> The vhost backend
> directly gets what is offered by the guest from the vq. Why would 
> there be a mismatch of
> max #sgs between QEMU and vhost, and what is the QEMU side max #sgs 
> used for? Thanks.

You need query the backend max #sgs in this case at least. no? If not 
how do you know the value is supported by the backend?

Thanks

>
> Best,
> Wei
>
>
Jason Wang June 13, 2017, 3:59 a.m. UTC | #15
On 2017年06月13日 11:55, Jason Wang wrote:
>
>
> On 2017年06月13日 11:51, Wei Wang wrote:
>> On 06/13/2017 11:19 AM, Jason Wang wrote:
>>>
>>>
>>> On 2017年06月13日 11:10, Wei Wang wrote:
>>>> On 06/13/2017 04:43 AM, Michael S. Tsirkin wrote:
>>>>> On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote:
>>>>>> Ping for comments, thanks.
>>>>> This was only posted a week ago, might be a bit too short for some
>>>>> people.
>>>> OK, sorry for the push.
>>>>> A couple of weeks is more reasonable before you ping.  Also, I
>>>>> sent a bunch of comments on Thu, 8 Jun 2017.  You should probably
>>>>> address these.
>>>>>
>>>> I responded to the comments. The main question is that I'm not sure
>>>> why we need the vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE.
>>>> IMHO, that should be a feature proposed to solve the possible issue
>>>> caused by the QEMU implemented backend.
>>>
>>> The issue is what if there's a mismatch of max #sgs between qemu and 
>>> vhost?
>>>
>>
>> When the vhost backend is used, QEMU is not involved in the data 
>> path. The vhost backend
>> directly gets what is offered by the guest from the vq.

FYI, qemu will try to fallback to userspace if there's something wrong 
with vhost-kernel (e.g the IOMMU support). This doesn't work for 
vhost-user actually, but it works for vhost-kernel.

Thanks

>> Why would there be a mismatch of
>> max #sgs between QEMU and vhost, and what is the QEMU side max #sgs 
>> used for? Thanks.
>
> You need query the backend max #sgs in this case at least. no? If not 
> how do you know the value is supported by the backend?
>
> Thanks
>
>>
>> Best,
>> Wei
>>
>>
>
>
Wang, Wei W June 13, 2017, 6:08 a.m. UTC | #16
On 06/13/2017 11:55 AM, Jason Wang wrote:
>
> On 2017年06月13日 11:51, Wei Wang wrote:
>> On 06/13/2017 11:19 AM, Jason Wang wrote:
>>>
>>> On 2017年06月13日 11:10, Wei Wang wrote:
>>>> On 06/13/2017 04:43 AM, Michael S. Tsirkin wrote:
>>>>> On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote:
>>>>>> Ping for comments, thanks.
>>>>> This was only posted a week ago, might be a bit too short for some
>>>>> people.
>>>> OK, sorry for the push.
>>>>> A couple of weeks is more reasonable before you ping.  Also, I
>>>>> sent a bunch of comments on Thu, 8 Jun 2017.  You should probably
>>>>> address these.
>>>>>
>>>> I responded to the comments. The main question is that I'm not sure
>>>> why we need the vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE.
>>>> IMHO, that should be a feature proposed to solve the possible issue
>>>> caused by the QEMU implemented backend.
>>> The issue is what if there's a mismatch of max #sgs between qemu and
>>> vhost?
>>>
>> When the vhost backend is used, QEMU is not involved in the data path.
>> The vhost backend
>> directly gets what is offered by the guest from the vq. Why would
>> there be a mismatch of
>> max #sgs between QEMU and vhost, and what is the QEMU side max #sgs
>> used for? Thanks.
> You need query the backend max #sgs in this case at least. no? If not
> how do you know the value is supported by the backend?
>
> Thanks
>
Here is my thought: vhost backend has already been supporting 1024 sgs,
so I think it might not be necessary to query the max sgs that the vhost
backend supports. In the setup phase, when QEMU detects the backend is
vhost, it assumes 1024 max sgs is supported, instead of giving an extra
call to query.

The advantage is that people who is using the vhost backend can upgrade
to use 1024 tx queue size by only applying the QEMU patches. Adding an extra
call to query the size would need to patch their vhost backend (like 
vhost-user),
which is difficult for them.


Best,
Wei
Wang, Wei W June 13, 2017, 6:13 a.m. UTC | #17
On 06/13/2017 11:59 AM, Jason Wang wrote:
>
> On 2017年06月13日 11:55, Jason Wang wrote:
>> The issue is what if there's a mismatch of max #sgs between qemu and
>> vhost?
>>
>>> When the vhost backend is used, QEMU is not involved in the data
>>> path. The vhost backend
>>> directly gets what is offered by the guest from the vq.
> FYI, qemu will try to fallback to userspace if there's something wrong
> with vhost-kernel (e.g the IOMMU support). This doesn't work for
> vhost-user actually, but it works for vhost-kernel.
>
> Thanks
>

That wouldn't be a problem. When it falls back to the QEMU backend,
the "max_chain_size" will be set according to the QEMU backend
(e.g. 1023). Guest will read the max_chain_size register.

Best,
Wei
Jason Wang June 13, 2017, 6:29 a.m. UTC | #18
On 2017年06月13日 14:08, Wei Wang wrote:
> On 06/13/2017 11:55 AM, Jason Wang wrote:
>>
>> On 2017年06月13日 11:51, Wei Wang wrote:
>>> On 06/13/2017 11:19 AM, Jason Wang wrote:
>>>>
>>>> On 2017年06月13日 11:10, Wei Wang wrote:
>>>>> On 06/13/2017 04:43 AM, Michael S. Tsirkin wrote:
>>>>>> On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote:
>>>>>>> Ping for comments, thanks.
>>>>>> This was only posted a week ago, might be a bit too short for some
>>>>>> people.
>>>>> OK, sorry for the push.
>>>>>> A couple of weeks is more reasonable before you ping.  Also, I
>>>>>> sent a bunch of comments on Thu, 8 Jun 2017.  You should probably
>>>>>> address these.
>>>>>>
>>>>> I responded to the comments. The main question is that I'm not sure
>>>>> why we need the vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE.
>>>>> IMHO, that should be a feature proposed to solve the possible issue
>>>>> caused by the QEMU implemented backend.
>>>> The issue is what if there's a mismatch of max #sgs between qemu and
>>>> vhost?
>>>>
>>> When the vhost backend is used, QEMU is not involved in the data path.
>>> The vhost backend
>>> directly gets what is offered by the guest from the vq. Why would
>>> there be a mismatch of
>>> max #sgs between QEMU and vhost, and what is the QEMU side max #sgs
>>> used for? Thanks.
>> You need query the backend max #sgs in this case at least. no? If not
>> how do you know the value is supported by the backend?
>>
>> Thanks
>>
> Here is my thought: vhost backend has already been supporting 1024 sgs,
> so I think it might not be necessary to query the max sgs that the vhost
> backend supports. In the setup phase, when QEMU detects the backend is
> vhost, it assumes 1024 max sgs is supported, instead of giving an extra
> call to query.

We can probably assume vhost kernel supports up to 1024 sgs. But how 
about for other vhost-user backends?

And what you said here makes me ask one of my questions in the past:

Do we have plan to extend 1024 to a larger value or 1024 looks good for 
the future years? If we only care about 1024, there's even no need for a 
new config filed, a feature flag is more than enough. If we want to 
extend it to e.g 2048, we definitely need to query vhost backend's limit 
(even for vhost-kernel).

Thanks

>
> The advantage is that people who is using the vhost backend can upgrade
> to use 1024 tx queue size by only applying the QEMU patches. Adding an 
> extra
> call to query the size would need to patch their vhost backend (like 
> vhost-user),
> which is difficult for them.
>
>
> Best,
> Wei
>
>
>
>
>
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>
Jason Wang June 13, 2017, 6:31 a.m. UTC | #19
On 2017年06月13日 14:13, Wei Wang wrote:
> On 06/13/2017 11:59 AM, Jason Wang wrote:
>>
>> On 2017年06月13日 11:55, Jason Wang wrote:
>>> The issue is what if there's a mismatch of max #sgs between qemu and
>>> vhost?
>>>
>>>> When the vhost backend is used, QEMU is not involved in the data
>>>> path. The vhost backend
>>>> directly gets what is offered by the guest from the vq.
>> FYI, qemu will try to fallback to userspace if there's something wrong
>> with vhost-kernel (e.g the IOMMU support). This doesn't work for
>> vhost-user actually, but it works for vhost-kernel.
>>
>> Thanks
>>
>
> That wouldn't be a problem. When it falls back to the QEMU backend,
> the "max_chain_size" will be set according to the QEMU backend
> (e.g. 1023). Guest will read the max_chain_size register.

What if there's backend that supports less than 1023? Or in the future, 
we increase the limit to e.g 2048?

Thanks

>
> Best,
> Wei
>
Wang, Wei W June 13, 2017, 7:17 a.m. UTC | #20
On 06/13/2017 02:29 PM, Jason Wang wrote:
> The issue is what if there's a mismatch of max #sgs between qemu and
>>>> When the vhost backend is used, QEMU is not involved in the data path.
>>>> The vhost backend
>>>> directly gets what is offered by the guest from the vq. Why would
>>>> there be a mismatch of
>>>> max #sgs between QEMU and vhost, and what is the QEMU side max #sgs
>>>> used for? Thanks.
>>> You need query the backend max #sgs in this case at least. no? If not
>>> how do you know the value is supported by the backend?
>>>
>>> Thanks
>>>
>> Here is my thought: vhost backend has already been supporting 1024 sgs,
>> so I think it might not be necessary to query the max sgs that the vhost
>> backend supports. In the setup phase, when QEMU detects the backend is
>> vhost, it assumes 1024 max sgs is supported, instead of giving an extra
>> call to query.
>
> We can probably assume vhost kernel supports up to 1024 sgs. But how 
> about for other vhost-user backends?
>
So far, I haven't seen any vhost backend implementation supporting less 
than 1024 sgs.


> And what you said here makes me ask one of my questions in the past:
>
> Do we have plan to extend 1024 to a larger value or 1024 looks good 
> for the future years? If we only care about 1024, there's even no need 
> for a new config filed, a feature flag is more than enough. If we want 
> to extend it to e.g 2048, we definitely need to query vhost backend's 
> limit (even for vhost-kernel).
>

According to virtio spec (e.g. 2.4.4), unreasonably large descriptors are
not encouraged to be used by the guest. If possible, I would suggest to use
1024 as the largest number of descriptors that the guest can chain, even 
when
we have larger queue size in the future. That is,
if (backend == QEMU backend)
     config.max_chain_size = 1023 (defined by the qemu backend 
implementation);
else if (backend == vhost)
     config.max_chain_size = 1024;

It is transparent to the guest. From the guest's point of view, all it 
knows is a value
given to him via reading config.max_chain_size.


Best,
Wei
Wang, Wei W June 13, 2017, 7:49 a.m. UTC | #21
On 06/13/2017 02:31 PM, Jason Wang wrote:
>
>
> On 2017年06月13日 14:13, Wei Wang wrote:
>> On 06/13/2017 11:59 AM, Jason Wang wrote:
>>>
>>> On 2017年06月13日 11:55, Jason Wang wrote:
>>>> The issue is what if there's a mismatch of max #sgs between qemu and
>>>> vhost?
>>>>
>>>>> When the vhost backend is used, QEMU is not involved in the data
>>>>> path. The vhost backend
>>>>> directly gets what is offered by the guest from the vq.
>>> FYI, qemu will try to fallback to userspace if there's something wrong
>>> with vhost-kernel (e.g the IOMMU support). This doesn't work for
>>> vhost-user actually, but it works for vhost-kernel.
>>>
>>> Thanks
>>>
>>
>> That wouldn't be a problem. When it falls back to the QEMU backend,
>> the "max_chain_size" will be set according to the QEMU backend
>> (e.g. 1023). Guest will read the max_chain_size register.
>
> What if there's backend that supports less than 1023? Or in the 
> future, we increase the limit to e.g 2048?
>

I agree the potential issue is that it's assumed (hardcoded) that the vhost
backend supports 1024 chain size. This only comes to be an issue if the 
vhost
backend implementation supports less than 1024 in the future, but that
can be solved by introducing another feature to support that.

If that's acceptable, some customers will be easy to upgrade their
already deployed products to use 1024 tx queue size.

Best,
Wei
Jason Wang June 13, 2017, 9:04 a.m. UTC | #22
On 2017年06月13日 15:17, Wei Wang wrote:
> On 06/13/2017 02:29 PM, Jason Wang wrote:
>> The issue is what if there's a mismatch of max #sgs between qemu and
>>>>> When the vhost backend is used, QEMU is not involved in the data 
>>>>> path.
>>>>> The vhost backend
>>>>> directly gets what is offered by the guest from the vq. Why would
>>>>> there be a mismatch of
>>>>> max #sgs between QEMU and vhost, and what is the QEMU side max #sgs
>>>>> used for? Thanks.
>>>> You need query the backend max #sgs in this case at least. no? If not
>>>> how do you know the value is supported by the backend?
>>>>
>>>> Thanks
>>>>
>>> Here is my thought: vhost backend has already been supporting 1024 sgs,
>>> so I think it might not be necessary to query the max sgs that the 
>>> vhost
>>> backend supports. In the setup phase, when QEMU detects the backend is
>>> vhost, it assumes 1024 max sgs is supported, instead of giving an extra
>>> call to query.
>>
>> We can probably assume vhost kernel supports up to 1024 sgs. But how 
>> about for other vhost-user backends?
>>
> So far, I haven't seen any vhost backend implementation supporting 
> less than 1024 sgs.

Since vhost-user is an open protocol we can not check each 
implementation (some may be even close sourced). For safety, we need an 
explicit clarification on this.

>
>
>> And what you said here makes me ask one of my questions in the past:
>>
>> Do we have plan to extend 1024 to a larger value or 1024 looks good 
>> for the future years? If we only care about 1024, there's even no 
>> need for a new config filed, a feature flag is more than enough. If 
>> we want to extend it to e.g 2048, we definitely need to query vhost 
>> backend's limit (even for vhost-kernel).
>>
>
> According to virtio spec (e.g. 2.4.4), unreasonably large descriptors are
> not encouraged to be used by the guest. If possible, I would suggest 
> to use
> 1024 as the largest number of descriptors that the guest can chain, 
> even when
> we have larger queue size in the future. That is,
> if (backend == QEMU backend)
>     config.max_chain_size = 1023 (defined by the qemu backend 
> implementation);
> else if (backend == vhost)
>     config.max_chain_size = 1024;
>
> It is transparent to the guest. From the guest's point of view, all it 
> knows is a value
> given to him via reading config.max_chain_size.

So not transparent actually, guest at least guest need to see and check 
for this. So the question still, since you only care about two cases in 
fact:

- backend supports 1024
- backend supports <1024 (qemu or whatever other backends)

So it looks like a new feature flag is more than enough. If 
device(backends) support this feature, it can make sure 1024 sgs is 
supported?

Thanks

>
>
> Best,
> Wei
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
Wang, Wei W June 13, 2017, 9:50 a.m. UTC | #23
On 06/13/2017 05:04 PM, Jason Wang wrote:
>
>
> On 2017年06月13日 15:17, Wei Wang wrote:
>> On 06/13/2017 02:29 PM, Jason Wang wrote:
>>> The issue is what if there's a mismatch of max #sgs between qemu and
>>>>>> When the vhost backend is used, QEMU is not involved in the data 
>>>>>> path.
>>>>>> The vhost backend
>>>>>> directly gets what is offered by the guest from the vq. Why would
>>>>>> there be a mismatch of
>>>>>> max #sgs between QEMU and vhost, and what is the QEMU side max #sgs
>>>>>> used for? Thanks.
>>>>> You need query the backend max #sgs in this case at least. no? If not
>>>>> how do you know the value is supported by the backend?
>>>>>
>>>>> Thanks
>>>>>
>>>> Here is my thought: vhost backend has already been supporting 1024 
>>>> sgs,
>>>> so I think it might not be necessary to query the max sgs that the 
>>>> vhost
>>>> backend supports. In the setup phase, when QEMU detects the backend is
>>>> vhost, it assumes 1024 max sgs is supported, instead of giving an 
>>>> extra
>>>> call to query.
>>>
>>> We can probably assume vhost kernel supports up to 1024 sgs. But how 
>>> about for other vhost-user backends?
>>>
>> So far, I haven't seen any vhost backend implementation supporting 
>> less than 1024 sgs.
>
> Since vhost-user is an open protocol we can not check each 
> implementation (some may be even close sourced). For safety, we need 
> an explicit clarification on this.
>
>>
>>
>>> And what you said here makes me ask one of my questions in the past:
>>>
>>> Do we have plan to extend 1024 to a larger value or 1024 looks good 
>>> for the future years? If we only care about 1024, there's even no 
>>> need for a new config filed, a feature flag is more than enough. If 
>>> we want to extend it to e.g 2048, we definitely need to query vhost 
>>> backend's limit (even for vhost-kernel).
>>>
>>
>> According to virtio spec (e.g. 2.4.4), unreasonably large descriptors 
>> are
>> not encouraged to be used by the guest. If possible, I would suggest 
>> to use
>> 1024 as the largest number of descriptors that the guest can chain, 
>> even when
>> we have larger queue size in the future. That is,
>> if (backend == QEMU backend)
>>     config.max_chain_size = 1023 (defined by the qemu backend 
>> implementation);
>> else if (backend == vhost)
>>     config.max_chain_size = 1024;
>>
>> It is transparent to the guest. From the guest's point of view, all 
>> it knows is a value
>> given to him via reading config.max_chain_size.
>
> So not transparent actually, guest at least guest need to see and 
> check for this. So the question still, since you only care about two 
> cases in fact:
>
> - backend supports 1024
> - backend supports <1024 (qemu or whatever other backends)
>
> So it looks like a new feature flag is more than enough. If 
> device(backends) support this feature, it can make sure 1024 sgs is 
> supported?
>

That wouldn't be enough. For example, QEMU3.0 backend supports 
max_chain_size=1023,
while QEMU4.0 backend supports max_chain_size=1021. How would the guest know
the max size with the same feature flag? Would it still chain 1023 
descriptors with QEMU4.0?

Best,
Wei
Jason Wang June 13, 2017, 10:46 a.m. UTC | #24
On 2017年06月13日 17:50, Wei Wang wrote:
> On 06/13/2017 05:04 PM, Jason Wang wrote:
>>
>>
>> On 2017年06月13日 15:17, Wei Wang wrote:
>>> On 06/13/2017 02:29 PM, Jason Wang wrote:
>>>> The issue is what if there's a mismatch of max #sgs between qemu and
>>>>>>> When the vhost backend is used, QEMU is not involved in the data 
>>>>>>> path.
>>>>>>> The vhost backend
>>>>>>> directly gets what is offered by the guest from the vq. Why would
>>>>>>> there be a mismatch of
>>>>>>> max #sgs between QEMU and vhost, and what is the QEMU side max #sgs
>>>>>>> used for? Thanks.
>>>>>> You need query the backend max #sgs in this case at least. no? If 
>>>>>> not
>>>>>> how do you know the value is supported by the backend?
>>>>>>
>>>>>> Thanks
>>>>>>
>>>>> Here is my thought: vhost backend has already been supporting 1024 
>>>>> sgs,
>>>>> so I think it might not be necessary to query the max sgs that the 
>>>>> vhost
>>>>> backend supports. In the setup phase, when QEMU detects the 
>>>>> backend is
>>>>> vhost, it assumes 1024 max sgs is supported, instead of giving an 
>>>>> extra
>>>>> call to query.
>>>>
>>>> We can probably assume vhost kernel supports up to 1024 sgs. But 
>>>> how about for other vhost-user backends?
>>>>
>>> So far, I haven't seen any vhost backend implementation supporting 
>>> less than 1024 sgs.
>>
>> Since vhost-user is an open protocol we can not check each 
>> implementation (some may be even close sourced). For safety, we need 
>> an explicit clarification on this.
>>
>>>
>>>
>>>> And what you said here makes me ask one of my questions in the past:
>>>>
>>>> Do we have plan to extend 1024 to a larger value or 1024 looks good 
>>>> for the future years? If we only care about 1024, there's even no 
>>>> need for a new config filed, a feature flag is more than enough. If 
>>>> we want to extend it to e.g 2048, we definitely need to query vhost 
>>>> backend's limit (even for vhost-kernel).
>>>>
>>>
>>> According to virtio spec (e.g. 2.4.4), unreasonably large 
>>> descriptors are
>>> not encouraged to be used by the guest. If possible, I would suggest 
>>> to use
>>> 1024 as the largest number of descriptors that the guest can chain, 
>>> even when
>>> we have larger queue size in the future. That is,
>>> if (backend == QEMU backend)
>>>     config.max_chain_size = 1023 (defined by the qemu backend 
>>> implementation);
>>> else if (backend == vhost)
>>>     config.max_chain_size = 1024;
>>>
>>> It is transparent to the guest. From the guest's point of view, all 
>>> it knows is a value
>>> given to him via reading config.max_chain_size.
>>
>> So not transparent actually, guest at least guest need to see and 
>> check for this. So the question still, since you only care about two 
>> cases in fact:
>>
>> - backend supports 1024
>> - backend supports <1024 (qemu or whatever other backends)
>>
>> So it looks like a new feature flag is more than enough. If 
>> device(backends) support this feature, it can make sure 1024 sgs is 
>> supported?
>>
>
> That wouldn't be enough. For example, QEMU3.0 backend supports 
> max_chain_size=1023,
> while QEMU4.0 backend supports max_chain_size=1021. How would the 
> guest know
> the max size with the same feature flag? Would it still chain 1023 
> descriptors with QEMU4.0?
>
> Best,
> Wei

I believe we won't go back to less than 1024 in the future. It may be 
worth to add a unittest for this to catch regression early.

Thanks

>
>
>
>
Jason Wang June 14, 2017, 11:26 a.m. UTC | #25
On 2017年06月13日 18:46, Jason Wang wrote:
>
>
> On 2017年06月13日 17:50, Wei Wang wrote:
>> On 06/13/2017 05:04 PM, Jason Wang wrote:
>>>
>>>
>>> On 2017年06月13日 15:17, Wei Wang wrote:
>>>> On 06/13/2017 02:29 PM, Jason Wang wrote:
>>>>> The issue is what if there's a mismatch of max #sgs between qemu and
>>>>>>>> When the vhost backend is used, QEMU is not involved in the 
>>>>>>>> data path.
>>>>>>>> The vhost backend
>>>>>>>> directly gets what is offered by the guest from the vq. Why would
>>>>>>>> there be a mismatch of
>>>>>>>> max #sgs between QEMU and vhost, and what is the QEMU side max 
>>>>>>>> #sgs
>>>>>>>> used for? Thanks.
>>>>>>> You need query the backend max #sgs in this case at least. no? 
>>>>>>> If not
>>>>>>> how do you know the value is supported by the backend?
>>>>>>>
>>>>>>> Thanks
>>>>>>>
>>>>>> Here is my thought: vhost backend has already been supporting 
>>>>>> 1024 sgs,
>>>>>> so I think it might not be necessary to query the max sgs that 
>>>>>> the vhost
>>>>>> backend supports. In the setup phase, when QEMU detects the 
>>>>>> backend is
>>>>>> vhost, it assumes 1024 max sgs is supported, instead of giving an 
>>>>>> extra
>>>>>> call to query.
>>>>>
>>>>> We can probably assume vhost kernel supports up to 1024 sgs. But 
>>>>> how about for other vhost-user backends?
>>>>>
>>>> So far, I haven't seen any vhost backend implementation supporting 
>>>> less than 1024 sgs.
>>>
>>> Since vhost-user is an open protocol we can not check each 
>>> implementation (some may be even close sourced). For safety, we need 
>>> an explicit clarification on this.
>>>
>>>>
>>>>
>>>>> And what you said here makes me ask one of my questions in the past:
>>>>>
>>>>> Do we have plan to extend 1024 to a larger value or 1024 looks 
>>>>> good for the future years? If we only care about 1024, there's 
>>>>> even no need for a new config filed, a feature flag is more than 
>>>>> enough. If we want to extend it to e.g 2048, we definitely need to 
>>>>> query vhost backend's limit (even for vhost-kernel).
>>>>>
>>>>
>>>> According to virtio spec (e.g. 2.4.4), unreasonably large 
>>>> descriptors are
>>>> not encouraged to be used by the guest. If possible, I would 
>>>> suggest to use
>>>> 1024 as the largest number of descriptors that the guest can chain, 
>>>> even when
>>>> we have larger queue size in the future. That is,
>>>> if (backend == QEMU backend)
>>>>     config.max_chain_size = 1023 (defined by the qemu backend 
>>>> implementation);
>>>> else if (backend == vhost)
>>>>     config.max_chain_size = 1024;
>>>>
>>>> It is transparent to the guest. From the guest's point of view, all 
>>>> it knows is a value
>>>> given to him via reading config.max_chain_size.
>>>
>>> So not transparent actually, guest at least guest need to see and 
>>> check for this. So the question still, since you only care about two 
>>> cases in fact:
>>>
>>> - backend supports 1024
>>> - backend supports <1024 (qemu or whatever other backends)
>>>
>>> So it looks like a new feature flag is more than enough. If 
>>> device(backends) support this feature, it can make sure 1024 sgs is 
>>> supported?
>>>
>>
>> That wouldn't be enough. For example, QEMU3.0 backend supports 
>> max_chain_size=1023,
>> while QEMU4.0 backend supports max_chain_size=1021. How would the 
>> guest know
>> the max size with the same feature flag? Would it still chain 1023 
>> descriptors with QEMU4.0?
>>
>> Best,
>> Wei
>
> I believe we won't go back to less than 1024 in the future. It may be 
> worth to add a unittest for this to catch regression early.
>
> Thanks
>

Consider the queue size is 256 now, I think maybe we can first make tx 
queue size configurable up to 1024, and then do the #sg stuffs on top.

What's your opinion, Michael?

Thanks
Michael S. Tsirkin June 14, 2017, 3:22 p.m. UTC | #26
On Wed, Jun 14, 2017 at 07:26:54PM +0800, Jason Wang wrote:
> 
> 
> On 2017年06月13日 18:46, Jason Wang wrote:
> > 
> > 
> > On 2017年06月13日 17:50, Wei Wang wrote:
> > > On 06/13/2017 05:04 PM, Jason Wang wrote:
> > > > 
> > > > 
> > > > On 2017年06月13日 15:17, Wei Wang wrote:
> > > > > On 06/13/2017 02:29 PM, Jason Wang wrote:
> > > > > > The issue is what if there's a mismatch of max #sgs between qemu and
> > > > > > > > > When the vhost backend is used, QEMU is not
> > > > > > > > > involved in the data path.
> > > > > > > > > The vhost backend
> > > > > > > > > directly gets what is offered by the guest from the vq. Why would
> > > > > > > > > there be a mismatch of
> > > > > > > > > max #sgs between QEMU and vhost, and what is
> > > > > > > > > the QEMU side max #sgs
> > > > > > > > > used for? Thanks.
> > > > > > > > You need query the backend max #sgs in this case
> > > > > > > > at least. no? If not
> > > > > > > > how do you know the value is supported by the backend?
> > > > > > > > 
> > > > > > > > Thanks
> > > > > > > > 
> > > > > > > Here is my thought: vhost backend has already been
> > > > > > > supporting 1024 sgs,
> > > > > > > so I think it might not be necessary to query the
> > > > > > > max sgs that the vhost
> > > > > > > backend supports. In the setup phase, when QEMU
> > > > > > > detects the backend is
> > > > > > > vhost, it assumes 1024 max sgs is supported, instead
> > > > > > > of giving an extra
> > > > > > > call to query.
> > > > > > 
> > > > > > We can probably assume vhost kernel supports up to 1024
> > > > > > sgs. But how about for other vhost-user backends?
> > > > > > 
> > > > > So far, I haven't seen any vhost backend implementation
> > > > > supporting less than 1024 sgs.
> > > > 
> > > > Since vhost-user is an open protocol we can not check each
> > > > implementation (some may be even close sourced). For safety, we
> > > > need an explicit clarification on this.
> > > > 
> > > > > 
> > > > > 
> > > > > > And what you said here makes me ask one of my questions in the past:
> > > > > > 
> > > > > > Do we have plan to extend 1024 to a larger value or 1024
> > > > > > looks good for the future years? If we only care about
> > > > > > 1024, there's even no need for a new config filed, a
> > > > > > feature flag is more than enough. If we want to extend
> > > > > > it to e.g 2048, we definitely need to query vhost
> > > > > > backend's limit (even for vhost-kernel).
> > > > > > 
> > > > > 
> > > > > According to virtio spec (e.g. 2.4.4), unreasonably large
> > > > > descriptors are
> > > > > not encouraged to be used by the guest. If possible, I would
> > > > > suggest to use
> > > > > 1024 as the largest number of descriptors that the guest can
> > > > > chain, even when
> > > > > we have larger queue size in the future. That is,
> > > > > if (backend == QEMU backend)
> > > > >     config.max_chain_size = 1023 (defined by the qemu
> > > > > backend implementation);
> > > > > else if (backend == vhost)
> > > > >     config.max_chain_size = 1024;
> > > > > 
> > > > > It is transparent to the guest. From the guest's point of
> > > > > view, all it knows is a value
> > > > > given to him via reading config.max_chain_size.
> > > > 
> > > > So not transparent actually, guest at least guest need to see
> > > > and check for this. So the question still, since you only care
> > > > about two cases in fact:
> > > > 
> > > > - backend supports 1024
> > > > - backend supports <1024 (qemu or whatever other backends)
> > > > 
> > > > So it looks like a new feature flag is more than enough. If
> > > > device(backends) support this feature, it can make sure 1024 sgs
> > > > is supported?
> > > > 
> > > 
> > > That wouldn't be enough. For example, QEMU3.0 backend supports
> > > max_chain_size=1023,
> > > while QEMU4.0 backend supports max_chain_size=1021. How would the
> > > guest know
> > > the max size with the same feature flag? Would it still chain 1023
> > > descriptors with QEMU4.0?
> > > 
> > > Best,
> > > Wei
> > 
> > I believe we won't go back to less than 1024 in the future. It may be
> > worth to add a unittest for this to catch regression early.
> > 
> > Thanks

I think I disagree with that. Smaller pipes a better (e.g. less cache
pressure) and you only need huge pipes because host thread gets
scheduled out for too long. With more CPUs there's less of a chance of
an overcommit so we'll be able to get by with smaller pipes in the
future.

> 
> Consider the queue size is 256 now, I think maybe we can first make tx queue
> size configurable up to 1024, and then do the #sg stuffs on top.
> 
> What's your opinion, Michael?
> 
> Thanks

With a kernel backend, 1024 is problematic since we are then unable
to add any entries or handle cases where an entry crosses an MR region
boundary. We could support up to 512 with a kernel backend but no one
seems to want that :)

With vhost-user the backend might be able to handle that. So an
acceptable option would be to allow 1K with vhost-user backends
only, trim it back with other backends.
Jason Wang June 15, 2017, 4:16 a.m. UTC | #27
On 2017年06月14日 23:22, Michael S. Tsirkin wrote:
> On Wed, Jun 14, 2017 at 07:26:54PM +0800, Jason Wang wrote:
>>
>> On 2017年06月13日 18:46, Jason Wang wrote:
>>>
>>> On 2017年06月13日 17:50, Wei Wang wrote:
>>>> On 06/13/2017 05:04 PM, Jason Wang wrote:
>>>>>
>>>>> On 2017年06月13日 15:17, Wei Wang wrote:
>>>>>> On 06/13/2017 02:29 PM, Jason Wang wrote:
>>>>>>> The issue is what if there's a mismatch of max #sgs between qemu and
>>>>>>>>>> When the vhost backend is used, QEMU is not
>>>>>>>>>> involved in the data path.
>>>>>>>>>> The vhost backend
>>>>>>>>>> directly gets what is offered by the guest from the vq. Why would
>>>>>>>>>> there be a mismatch of
>>>>>>>>>> max #sgs between QEMU and vhost, and what is
>>>>>>>>>> the QEMU side max #sgs
>>>>>>>>>> used for? Thanks.
>>>>>>>>> You need query the backend max #sgs in this case
>>>>>>>>> at least. no? If not
>>>>>>>>> how do you know the value is supported by the backend?
>>>>>>>>>
>>>>>>>>> Thanks
>>>>>>>>>
>>>>>>>> Here is my thought: vhost backend has already been
>>>>>>>> supporting 1024 sgs,
>>>>>>>> so I think it might not be necessary to query the
>>>>>>>> max sgs that the vhost
>>>>>>>> backend supports. In the setup phase, when QEMU
>>>>>>>> detects the backend is
>>>>>>>> vhost, it assumes 1024 max sgs is supported, instead
>>>>>>>> of giving an extra
>>>>>>>> call to query.
>>>>>>> We can probably assume vhost kernel supports up to 1024
>>>>>>> sgs. But how about for other vhost-user backends?
>>>>>>>
>>>>>> So far, I haven't seen any vhost backend implementation
>>>>>> supporting less than 1024 sgs.
>>>>> Since vhost-user is an open protocol we can not check each
>>>>> implementation (some may be even close sourced). For safety, we
>>>>> need an explicit clarification on this.
>>>>>
>>>>>>
>>>>>>> And what you said here makes me ask one of my questions in the past:
>>>>>>>
>>>>>>> Do we have plan to extend 1024 to a larger value or 1024
>>>>>>> looks good for the future years? If we only care about
>>>>>>> 1024, there's even no need for a new config filed, a
>>>>>>> feature flag is more than enough. If we want to extend
>>>>>>> it to e.g 2048, we definitely need to query vhost
>>>>>>> backend's limit (even for vhost-kernel).
>>>>>>>
>>>>>> According to virtio spec (e.g. 2.4.4), unreasonably large
>>>>>> descriptors are
>>>>>> not encouraged to be used by the guest. If possible, I would
>>>>>> suggest to use
>>>>>> 1024 as the largest number of descriptors that the guest can
>>>>>> chain, even when
>>>>>> we have larger queue size in the future. That is,
>>>>>> if (backend == QEMU backend)
>>>>>>      config.max_chain_size = 1023 (defined by the qemu
>>>>>> backend implementation);
>>>>>> else if (backend == vhost)
>>>>>>      config.max_chain_size = 1024;
>>>>>>
>>>>>> It is transparent to the guest. From the guest's point of
>>>>>> view, all it knows is a value
>>>>>> given to him via reading config.max_chain_size.
>>>>> So not transparent actually, guest at least guest need to see
>>>>> and check for this. So the question still, since you only care
>>>>> about two cases in fact:
>>>>>
>>>>> - backend supports 1024
>>>>> - backend supports <1024 (qemu or whatever other backends)
>>>>>
>>>>> So it looks like a new feature flag is more than enough. If
>>>>> device(backends) support this feature, it can make sure 1024 sgs
>>>>> is supported?
>>>>>
>>>> That wouldn't be enough. For example, QEMU3.0 backend supports
>>>> max_chain_size=1023,
>>>> while QEMU4.0 backend supports max_chain_size=1021. How would the
>>>> guest know
>>>> the max size with the same feature flag? Would it still chain 1023
>>>> descriptors with QEMU4.0?
>>>>
>>>> Best,
>>>> Wei
>>> I believe we won't go back to less than 1024 in the future. It may be
>>> worth to add a unittest for this to catch regression early.
>>>
>>> Thanks
> I think I disagree with that. Smaller pipes a better (e.g. less cache
> pressure) and you only need huge pipes because host thread gets
> scheduled out for too long. With more CPUs there's less of a chance of
> an overcommit so we'll be able to get by with smaller pipes in the
> future.

Agree, but we are talking about the upper limit. Even if 1024 is 
supported, small number of #sgs is still encouraged.

>
>> Consider the queue size is 256 now, I think maybe we can first make tx queue
>> size configurable up to 1024, and then do the #sg stuffs on top.
>>
>> What's your opinion, Michael?
>>
>> Thanks
> With a kernel backend, 1024 is problematic since we are then unable
> to add any entries or handle cases where an entry crosses an MR region
> boundary. We could support up to 512 with a kernel backend but no one
> seems to want that :)

Then I see issues with indirect descriptors.

We try to allow up 1024 chained descriptors implicitly since 
e0e9b406470b ("vhost: max s/g to match qemu"). If guest can submit 
crossing MR descs, I'm afraid we've already had this bug since this 
commit. And actually this seems conflict to what spec said in 2.4.5:

"""
The number of descriptors in the table is defined by the queue size for 
this virtqueue: this is the maximum possible descriptor chain length.
"""

Technically, we had the same issue for rx since we allow 1024 queue size 
now.

So actually, allowing the size to 1024 does not introduce any new trouble?
>
> With vhost-user the backend might be able to handle that. So an
> acceptable option would be to allow 1K with vhost-user backends
> only, trim it back with other backends.
>

I believe the idea is to clarify the maximum chain size instead of 
having any assumption.

Thanks
Wang, Wei W June 15, 2017, 6:52 a.m. UTC | #28
On 06/15/2017 12:16 PM, Jason Wang wrote:
>
>
> On 2017年06月14日 23:22, Michael S. Tsirkin wrote:
>> On Wed, Jun 14, 2017 at 07:26:54PM +0800, Jason Wang wrote:
>>>
>>> On 2017年06月13日 18:46, Jason Wang wrote:
>>>>
>>>> On 2017年06月13日 17:50, Wei Wang wrote:
>>>>> On 06/13/2017 05:04 PM, Jason Wang wrote:
>>>>>>
>>>>>> On 2017年06月13日 15:17, Wei Wang wrote:
>>>>>>> On 06/13/2017 02:29 PM, Jason Wang wrote:
>>>>>>>> The issue is what if there's a mismatch of max #sgs between 
>>>>>>>> qemu and
>>>>>>>>>>> When the vhost backend is used, QEMU is not
>>>>>>>>>>> involved in the data path.
>>>>>>>>>>> The vhost backend
>>>>>>>>>>> directly gets what is offered by the guest from the vq. Why 
>>>>>>>>>>> would
>>>>>>>>>>> there be a mismatch of
>>>>>>>>>>> max #sgs between QEMU and vhost, and what is
>>>>>>>>>>> the QEMU side max #sgs
>>>>>>>>>>> used for? Thanks.
>>>>>>>>>> You need query the backend max #sgs in this case
>>>>>>>>>> at least. no? If not
>>>>>>>>>> how do you know the value is supported by the backend?
>>>>>>>>>>
>>>>>>>>>> Thanks
>>>>>>>>>>
>>>>>>>>> Here is my thought: vhost backend has already been
>>>>>>>>> supporting 1024 sgs,
>>>>>>>>> so I think it might not be necessary to query the
>>>>>>>>> max sgs that the vhost
>>>>>>>>> backend supports. In the setup phase, when QEMU
>>>>>>>>> detects the backend is
>>>>>>>>> vhost, it assumes 1024 max sgs is supported, instead
>>>>>>>>> of giving an extra
>>>>>>>>> call to query.
>>>>>>>> We can probably assume vhost kernel supports up to 1024
>>>>>>>> sgs. But how about for other vhost-user backends?
>>>>>>>>
>>>>>>> So far, I haven't seen any vhost backend implementation
>>>>>>> supporting less than 1024 sgs.
>>>>>> Since vhost-user is an open protocol we can not check each
>>>>>> implementation (some may be even close sourced). For safety, we
>>>>>> need an explicit clarification on this.
>>>>>>
>>>>>>>
>>>>>>>> And what you said here makes me ask one of my questions in the 
>>>>>>>> past:
>>>>>>>>
>>>>>>>> Do we have plan to extend 1024 to a larger value or 1024
>>>>>>>> looks good for the future years? If we only care about
>>>>>>>> 1024, there's even no need for a new config filed, a
>>>>>>>> feature flag is more than enough. If we want to extend
>>>>>>>> it to e.g 2048, we definitely need to query vhost
>>>>>>>> backend's limit (even for vhost-kernel).
>>>>>>>>
>>>>>>> According to virtio spec (e.g. 2.4.4), unreasonably large
>>>>>>> descriptors are
>>>>>>> not encouraged to be used by the guest. If possible, I would
>>>>>>> suggest to use
>>>>>>> 1024 as the largest number of descriptors that the guest can
>>>>>>> chain, even when
>>>>>>> we have larger queue size in the future. That is,
>>>>>>> if (backend == QEMU backend)
>>>>>>>      config.max_chain_size = 1023 (defined by the qemu
>>>>>>> backend implementation);
>>>>>>> else if (backend == vhost)
>>>>>>>      config.max_chain_size = 1024;
>>>>>>>
>>>>>>> It is transparent to the guest. From the guest's point of
>>>>>>> view, all it knows is a value
>>>>>>> given to him via reading config.max_chain_size.
>>>>>> So not transparent actually, guest at least guest need to see
>>>>>> and check for this. So the question still, since you only care
>>>>>> about two cases in fact:
>>>>>>
>>>>>> - backend supports 1024
>>>>>> - backend supports <1024 (qemu or whatever other backends)
>>>>>>
>>>>>> So it looks like a new feature flag is more than enough. If
>>>>>> device(backends) support this feature, it can make sure 1024 sgs
>>>>>> is supported?
>>>>>>
>>>>> That wouldn't be enough. For example, QEMU3.0 backend supports
>>>>> max_chain_size=1023,
>>>>> while QEMU4.0 backend supports max_chain_size=1021. How would the
>>>>> guest know
>>>>> the max size with the same feature flag? Would it still chain 1023
>>>>> descriptors with QEMU4.0?
>>>>>
>>>>> Best,
>>>>> Wei
>>>> I believe we won't go back to less than 1024 in the future. It may be
>>>> worth to add a unittest for this to catch regression early.
>>>>
>>>> Thanks
>> I think I disagree with that. Smaller pipes a better (e.g. less cache
>> pressure) and you only need huge pipes because host thread gets
>> scheduled out for too long. With more CPUs there's less of a chance of
>> an overcommit so we'll be able to get by with smaller pipes in the
>> future.
>
> Agree, but we are talking about the upper limit. Even if 1024 is 
> supported, small number of #sgs is still encouraged.
>
>>
>>> Consider the queue size is 256 now, I think maybe we can first make 
>>> tx queue
>>> size configurable up to 1024, and then do the #sg stuffs on top.
>>>
>>> What's your opinion, Michael?
>>>
>>> Thanks
>> With a kernel backend, 1024 is problematic since we are then unable
>> to add any entries or handle cases where an entry crosses an MR region
>> boundary. We could support up to 512 with a kernel backend but no one
>> seems to want that :)
>
> Then I see issues with indirect descriptors.
>
> We try to allow up 1024 chained descriptors implicitly since 
> e0e9b406470b ("vhost: max s/g to match qemu"). If guest can submit 
> crossing MR descs, I'm afraid we've already had this bug since this 
> commit. And actually this seems conflict to what spec said in 2.4.5:
>
> """
> The number of descriptors in the table is defined by the queue size 
> for this virtqueue: this is the maximum possible descriptor chain length.
> """
>
> Technically, we had the same issue for rx since we allow 1024 queue 
> size now.
>
> So actually, allowing the size to 1024 does not introduce any new 
> trouble?
>>
>> With vhost-user the backend might be able to handle that. So an
>> acceptable option would be to allow 1K with vhost-user backends
>> only, trim it back with other backends.
>>
>
> I believe the idea is to clarify the maximum chain size instead of 
> having any assumption.
>
>

I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE.

For now, how about splitting it into two series of patches:
1) enable 1024 tx queue size for vhost-user, to let the users of 
vhost-user to easily use 1024 queue size.
2) enable VIRTIO_F_MAX_CHAIN_SIZE,  to enhance robustness.

Best,
Wei
Michael S. Tsirkin June 16, 2017, 3:22 a.m. UTC | #29
On Thu, Jun 15, 2017 at 02:52:01PM +0800, Wei Wang wrote:
> On 06/15/2017 12:16 PM, Jason Wang wrote:
> > 
> > 
> > On 2017年06月14日 23:22, Michael S. Tsirkin wrote:
> > > On Wed, Jun 14, 2017 at 07:26:54PM +0800, Jason Wang wrote:
> > > > 
> > > > On 2017年06月13日 18:46, Jason Wang wrote:
> > > > > 
> > > > > On 2017年06月13日 17:50, Wei Wang wrote:
> > > > > > On 06/13/2017 05:04 PM, Jason Wang wrote:
> > > > > > > 
> > > > > > > On 2017年06月13日 15:17, Wei Wang wrote:
> > > > > > > > On 06/13/2017 02:29 PM, Jason Wang wrote:
> > > > > > > > > The issue is what if there's a mismatch of
> > > > > > > > > max #sgs between qemu and
> > > > > > > > > > > > When the vhost backend is used, QEMU is not
> > > > > > > > > > > > involved in the data path.
> > > > > > > > > > > > The vhost backend
> > > > > > > > > > > > directly gets what is offered by
> > > > > > > > > > > > the guest from the vq. Why would
> > > > > > > > > > > > there be a mismatch of
> > > > > > > > > > > > max #sgs between QEMU and vhost, and what is
> > > > > > > > > > > > the QEMU side max #sgs
> > > > > > > > > > > > used for? Thanks.
> > > > > > > > > > > You need query the backend max #sgs in this case
> > > > > > > > > > > at least. no? If not
> > > > > > > > > > > how do you know the value is supported by the backend?
> > > > > > > > > > > 
> > > > > > > > > > > Thanks
> > > > > > > > > > > 
> > > > > > > > > > Here is my thought: vhost backend has already been
> > > > > > > > > > supporting 1024 sgs,
> > > > > > > > > > so I think it might not be necessary to query the
> > > > > > > > > > max sgs that the vhost
> > > > > > > > > > backend supports. In the setup phase, when QEMU
> > > > > > > > > > detects the backend is
> > > > > > > > > > vhost, it assumes 1024 max sgs is supported, instead
> > > > > > > > > > of giving an extra
> > > > > > > > > > call to query.
> > > > > > > > > We can probably assume vhost kernel supports up to 1024
> > > > > > > > > sgs. But how about for other vhost-user backends?
> > > > > > > > > 
> > > > > > > > So far, I haven't seen any vhost backend implementation
> > > > > > > > supporting less than 1024 sgs.
> > > > > > > Since vhost-user is an open protocol we can not check each
> > > > > > > implementation (some may be even close sourced). For safety, we
> > > > > > > need an explicit clarification on this.
> > > > > > > 
> > > > > > > > 
> > > > > > > > > And what you said here makes me ask one of
> > > > > > > > > my questions in the past:
> > > > > > > > > 
> > > > > > > > > Do we have plan to extend 1024 to a larger value or 1024
> > > > > > > > > looks good for the future years? If we only care about
> > > > > > > > > 1024, there's even no need for a new config filed, a
> > > > > > > > > feature flag is more than enough. If we want to extend
> > > > > > > > > it to e.g 2048, we definitely need to query vhost
> > > > > > > > > backend's limit (even for vhost-kernel).
> > > > > > > > > 
> > > > > > > > According to virtio spec (e.g. 2.4.4), unreasonably large
> > > > > > > > descriptors are
> > > > > > > > not encouraged to be used by the guest. If possible, I would
> > > > > > > > suggest to use
> > > > > > > > 1024 as the largest number of descriptors that the guest can
> > > > > > > > chain, even when
> > > > > > > > we have larger queue size in the future. That is,
> > > > > > > > if (backend == QEMU backend)
> > > > > > > >      config.max_chain_size = 1023 (defined by the qemu
> > > > > > > > backend implementation);
> > > > > > > > else if (backend == vhost)
> > > > > > > >      config.max_chain_size = 1024;
> > > > > > > > 
> > > > > > > > It is transparent to the guest. From the guest's point of
> > > > > > > > view, all it knows is a value
> > > > > > > > given to him via reading config.max_chain_size.
> > > > > > > So not transparent actually, guest at least guest need to see
> > > > > > > and check for this. So the question still, since you only care
> > > > > > > about two cases in fact:
> > > > > > > 
> > > > > > > - backend supports 1024
> > > > > > > - backend supports <1024 (qemu or whatever other backends)
> > > > > > > 
> > > > > > > So it looks like a new feature flag is more than enough. If
> > > > > > > device(backends) support this feature, it can make sure 1024 sgs
> > > > > > > is supported?
> > > > > > > 
> > > > > > That wouldn't be enough. For example, QEMU3.0 backend supports
> > > > > > max_chain_size=1023,
> > > > > > while QEMU4.0 backend supports max_chain_size=1021. How would the
> > > > > > guest know
> > > > > > the max size with the same feature flag? Would it still chain 1023
> > > > > > descriptors with QEMU4.0?
> > > > > > 
> > > > > > Best,
> > > > > > Wei
> > > > > I believe we won't go back to less than 1024 in the future. It may be
> > > > > worth to add a unittest for this to catch regression early.
> > > > > 
> > > > > Thanks
> > > I think I disagree with that. Smaller pipes a better (e.g. less cache
> > > pressure) and you only need huge pipes because host thread gets
> > > scheduled out for too long. With more CPUs there's less of a chance of
> > > an overcommit so we'll be able to get by with smaller pipes in the
> > > future.
> > 
> > Agree, but we are talking about the upper limit. Even if 1024 is
> > supported, small number of #sgs is still encouraged.
> > 
> > > 
> > > > Consider the queue size is 256 now, I think maybe we can first
> > > > make tx queue
> > > > size configurable up to 1024, and then do the #sg stuffs on top.
> > > > 
> > > > What's your opinion, Michael?
> > > > 
> > > > Thanks
> > > With a kernel backend, 1024 is problematic since we are then unable
> > > to add any entries or handle cases where an entry crosses an MR region
> > > boundary. We could support up to 512 with a kernel backend but no one
> > > seems to want that :)
> > 
> > Then I see issues with indirect descriptors.
> > 
> > We try to allow up 1024 chained descriptors implicitly since
> > e0e9b406470b ("vhost: max s/g to match qemu"). If guest can submit
> > crossing MR descs, I'm afraid we've already had this bug since this
> > commit. And actually this seems conflict to what spec said in 2.4.5:
> > 
> > """
> > The number of descriptors in the table is defined by the queue size for
> > this virtqueue: this is the maximum possible descriptor chain length.
> > """
> > 
> > Technically, we had the same issue for rx since we allow 1024 queue size
> > now.
> > 
> > So actually, allowing the size to 1024 does not introduce any new
> > trouble?
> > > 
> > > With vhost-user the backend might be able to handle that. So an
> > > acceptable option would be to allow 1K with vhost-user backends
> > > only, trim it back with other backends.
> > > 
> > 
> > I believe the idea is to clarify the maximum chain size instead of
> > having any assumption.
> > 
> > 
> 
> I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE.
> 
> For now, how about splitting it into two series of patches:
> 1) enable 1024 tx queue size for vhost-user, to let the users of vhost-user
> to easily use 1024 queue size.

Fine with me. 1) will get property from user but override it on
!vhost-user. Do we need a protocol flag? It seems prudent but we get
back to cross-version migration issues that are still pending solution.
Marc Andre, what's the status of that work?

> 2) enable VIRTIO_F_MAX_CHAIN_SIZE,  to enhance robustness.

Rather, to support it for more backends.

> Best,
> Wei
Jason Wang June 16, 2017, 8:57 a.m. UTC | #30
On 2017年06月16日 11:22, Michael S. Tsirkin wrote:
>> I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE.
>>
>> For now, how about splitting it into two series of patches:
>> 1) enable 1024 tx queue size for vhost-user, to let the users of vhost-user
>> to easily use 1024 queue size.
> Fine with me. 1) will get property from user but override it on
> !vhost-user. Do we need a protocol flag? It seems prudent but we get
> back to cross-version migration issues that a04re still pending solution.
> Marc Andre, what's the status of that work?
>
>> 2) enable VIRTIO_F_MAX_CHAIN_SIZE,  to enhance robustness.
> Rather, to support it for more backends.

Ok, if we want to support different values of max chain size in the 
future. It would be problematic for migration of cross backends, 
consider the case when migrating from 2048 (vhost-user) to 1024 
(qemu/vhost-kernel).

Thanks

>
>> Best,
>> Wei
> --
Wang, Wei W June 16, 2017, 10:10 a.m. UTC | #31
On 06/16/2017 04:57 PM, Jason Wang wrote:
>
>
> On 2017年06月16日 11:22, Michael S. Tsirkin wrote:
>>> I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE.
>>>
>>> For now, how about splitting it into two series of patches:
>>> 1) enable 1024 tx queue size for vhost-user, to let the users of 
>>> vhost-user
>>> to easily use 1024 queue size.
>> Fine with me. 1) will get property from user but override it on
>> !vhost-user. Do we need a protocol flag? It seems prudent but we get
>> back to cross-version migration issues that are still pending solution.
What do you have in mind about the protocol flag?

Btw, I just tested the patch of 1), and it works fine with migration 
from the
patched to non-patched version of QEMU. I'll send it out. Please have a 
check.


>> Marc Andre, what's the status of that work?
>>
>>> 2) enable VIRTIO_F_MAX_CHAIN_SIZE,  to enhance robustness.
>> Rather, to support it for more backends.
>
> Ok, if we want to support different values of max chain size in the 
> future. It would be problematic for migration of cross backends, 
> consider the case when migrating from 2048 (vhost-user) to 1024 
> (qemu/vhost-kernel).
>

I think that wouldn't be a problem. If there is a possibility to change the
backend resulting in a change of config.max_change_size, a configuration
change notification can be injected to the guest, then guest will read and
get the new value.

Best,
Wei
Michael S. Tsirkin June 16, 2017, 3:15 p.m. UTC | #32
On Fri, Jun 16, 2017 at 06:10:27PM +0800, Wei Wang wrote:
> On 06/16/2017 04:57 PM, Jason Wang wrote:
> > 
> > 
> > On 2017年06月16日 11:22, Michael S. Tsirkin wrote:
> > > > I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE.
> > > > 
> > > > For now, how about splitting it into two series of patches:
> > > > 1) enable 1024 tx queue size for vhost-user, to let the users of
> > > > vhost-user
> > > > to easily use 1024 queue size.
> > > Fine with me. 1) will get property from user but override it on
> > > !vhost-user. Do we need a protocol flag? It seems prudent but we get
> > > back to cross-version migration issues that are still pending solution.
> What do you have in mind about the protocol flag?

Merely this: older clients might be confused if they get
a s/g with 1024 entries.

> Btw, I just tested the patch of 1), and it works fine with migration from
> the
> patched to non-patched version of QEMU. I'll send it out. Please have a
> check.
> 
> 
> > > Marc Andre, what's the status of that work?
> > > 
> > > > 2) enable VIRTIO_F_MAX_CHAIN_SIZE,  to enhance robustness.
> > > Rather, to support it for more backends.
> > 
> > Ok, if we want to support different values of max chain size in the
> > future. It would be problematic for migration of cross backends,
> > consider the case when migrating from 2048 (vhost-user) to 1024
> > (qemu/vhost-kernel).
> > 
> 
> I think that wouldn't be a problem. If there is a possibility to change the
> backend resulting in a change of config.max_change_size, a configuration
> change notification can be injected to the guest, then guest will read and
> get the new value.
> 
> Best,
> Wei

This might not be supportable by all guests. E.g. some requests might
already be in the queue. I'm not against reconfiguring devices across
migration but I think it's a big project. As a 1st step I would focus on
keeping configuration consistent across migrations.
Michael S. Tsirkin June 16, 2017, 3:19 p.m. UTC | #33
On Fri, Jun 16, 2017 at 04:57:01PM +0800, Jason Wang wrote:
> 
> 
> On 2017年06月16日 11:22, Michael S. Tsirkin wrote:
> > > I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE.
> > > 
> > > For now, how about splitting it into two series of patches:
> > > 1) enable 1024 tx queue size for vhost-user, to let the users of vhost-user
> > > to easily use 1024 queue size.
> > Fine with me. 1) will get property from user but override it on
> > !vhost-user. Do we need a protocol flag? It seems prudent but we get
> > back to cross-version migration issues that a04re still pending solution.
> > Marc Andre, what's the status of that work?
> > 
> > > 2) enable VIRTIO_F_MAX_CHAIN_SIZE,  to enhance robustness.
> > Rather, to support it for more backends.
> 
> Ok, if we want to support different values of max chain size in the future.
> It would be problematic for migration of cross backends, consider the case
> when migrating from 2048 (vhost-user) to 1024 (qemu/vhost-kernel).
> 
> Thanks

That's already a problem, and it's growing with each new feature.
Maxime looked at supporting vhost-user backends cross-version migration,
I think we must merge some solution sooner rather than later, preferably
by the next release.

Maxime, any update here? Do we need a meeting to reach consensus?

> > 
> > > Best,
> > > Wei
> > --
Maxime Coquelin June 16, 2017, 5:04 p.m. UTC | #34
On 06/16/2017 05:19 PM, Michael S. Tsirkin wrote:
> On Fri, Jun 16, 2017 at 04:57:01PM +0800, Jason Wang wrote:
>>
>>
>> On 2017年06月16日 11:22, Michael S. Tsirkin wrote:
>>>> I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE.
>>>>
>>>> For now, how about splitting it into two series of patches:
>>>> 1) enable 1024 tx queue size for vhost-user, to let the users of vhost-user
>>>> to easily use 1024 queue size.
>>> Fine with me. 1) will get property from user but override it on
>>> !vhost-user. Do we need a protocol flag? It seems prudent but we get
>>> back to cross-version migration issues that a04re still pending solution.
>>> Marc Andre, what's the status of that work?
>>>
>>>> 2) enable VIRTIO_F_MAX_CHAIN_SIZE,  to enhance robustness.
>>> Rather, to support it for more backends.
>>
>> Ok, if we want to support different values of max chain size in the future.
>> It would be problematic for migration of cross backends, consider the case
>> when migrating from 2048 (vhost-user) to 1024 (qemu/vhost-kernel).
>>
>> Thanks
> 
> That's already a problem, and it's growing with each new feature.
> Maxime looked at supporting vhost-user backends cross-version migration,
> I think we must merge some solution sooner rather than later, preferably
> by the next release.
> 
> Maxime, any update here? Do we need a meeting to reach consensus?

No update, I haven't found time to progress on the topic yet.

For those who aren't aware of my initial proposal, you may find it here:
https://www.spinics.net/linux/fedora/libvir/msg142668.html

If my understanding is correct, you were concerned about the complexity 
of my
proposal which involved too many layers. Your suggestion was to have a tool
provided with qemu that would connect to vhost-user socket and query the
backend capabilities.
I'm not 100% clear how it would work, as the trend is to start the 
backend in
client mode, meaning QEMU creates the socket. In this case, should the tool
create the socket and management tool request the backend to connect to it?

I think it could make sense to have a meeting, but maybe we should first
discuss the solutions on the list for efficiency.

For the delivery, what is QEMU v2.10 planned release date?

Note that my solution doesn't involve QEMU, so it would not be tight to QEMU
release date. But, that doesn't mean it would be delivered sooner than
your solution.

Maxime
Michael S. Tsirkin June 16, 2017, 8:33 p.m. UTC | #35
On Fri, Jun 16, 2017 at 07:04:27PM +0200, Maxime Coquelin wrote:
> 
> 
> On 06/16/2017 05:19 PM, Michael S. Tsirkin wrote:
> > On Fri, Jun 16, 2017 at 04:57:01PM +0800, Jason Wang wrote:
> > > 
> > > 
> > > On 2017年06月16日 11:22, Michael S. Tsirkin wrote:
> > > > > I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE.
> > > > > 
> > > > > For now, how about splitting it into two series of patches:
> > > > > 1) enable 1024 tx queue size for vhost-user, to let the users of vhost-user
> > > > > to easily use 1024 queue size.
> > > > Fine with me. 1) will get property from user but override it on
> > > > !vhost-user. Do we need a protocol flag? It seems prudent but we get
> > > > back to cross-version migration issues that a04re still pending solution.
> > > > Marc Andre, what's the status of that work?
> > > > 
> > > > > 2) enable VIRTIO_F_MAX_CHAIN_SIZE,  to enhance robustness.
> > > > Rather, to support it for more backends.
> > > 
> > > Ok, if we want to support different values of max chain size in the future.
> > > It would be problematic for migration of cross backends, consider the case
> > > when migrating from 2048 (vhost-user) to 1024 (qemu/vhost-kernel).
> > > 
> > > Thanks
> > 
> > That's already a problem, and it's growing with each new feature.
> > Maxime looked at supporting vhost-user backends cross-version migration,
> > I think we must merge some solution sooner rather than later, preferably
> > by the next release.
> > 
> > Maxime, any update here? Do we need a meeting to reach consensus?
> 
> No update, I haven't found time to progress on the topic yet.
> 
> For those who aren't aware of my initial proposal, you may find it here:
> https://www.spinics.net/linux/fedora/libvir/msg142668.html
> 
> If my understanding is correct, you were concerned about the complexity of
> my
> proposal which involved too many layers. Your suggestion was to have a tool
> provided with qemu that would connect to vhost-user socket and query the
> backend capabilities.
> I'm not 100% clear how it would work, as the trend is to start the backend
> in
> client mode, meaning QEMU creates the socket. In this case, should the tool
> create the socket and management tool request the backend to connect to it?
> 
> I think it could make sense to have a meeting, but maybe we should first
> discuss the solutions on the list for efficiency.
> 
> For the delivery, what is QEMU v2.10 planned release date?
> 
> Note that my solution doesn't involve QEMU, so it would not be tight to QEMU
> release date. But, that doesn't mean it would be delivered sooner than
> your solution.
> 
> Maxime

I'd say let's go with your proposal (Solution 3 above).
Wang, Wei W June 17, 2017, 8:37 a.m. UTC | #36
On 06/16/2017 11:15 PM, Michael S. Tsirkin wrote:
> On Fri, Jun 16, 2017 at 06:10:27PM +0800, Wei Wang wrote:
>> On 06/16/2017 04:57 PM, Jason Wang wrote:
>>>
>>> On 2017年06月16日 11:22, Michael S. Tsirkin wrote:
>>>>> I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE.
>>>>>
>>>>> For now, how about splitting it into two series of patches:
>>>>> 1) enable 1024 tx queue size for vhost-user, to let the users of
>>>>> vhost-user
>>>>> to easily use 1024 queue size.
>>>> Fine with me. 1) will get property from user but override it on
>>>> !vhost-user. Do we need a protocol flag? It seems prudent but we get
>>>> back to cross-version migration issues that are still pending solution.
>> What do you have in mind about the protocol flag?
> Merely this: older clients might be confused if they get
> a s/g with 1024 entries.

I don't disagree to add that. But the client (i.e. vhost-user
slave) is a host userspace program, and it seems that users can
easily patch their host side applications if there is any issue,
maybe we also don't need to be too prudent about that, do we?

Also, the usage of the protocol flag looks like a duplicate of what
we plan to add in the next step - the virtio common feature flag,
VIRTIO_F_MAX_CHAIN_SIZE, which is more general and can be used
across different backends.

>> Btw, I just tested the patch of 1), and it works fine with migration from
>> the
>> patched to non-patched version of QEMU. I'll send it out. Please have a
>> check.
>>
>>
>>>> Marc Andre, what's the status of that work?
>>>>
>>>>> 2) enable VIRTIO_F_MAX_CHAIN_SIZE,  to enhance robustness.
>>>> Rather, to support it for more backends.
>>> Ok, if we want to support different values of max chain size in the
>>> future. It would be problematic for migration of cross backends,
>>> consider the case when migrating from 2048 (vhost-user) to 1024
>>> (qemu/vhost-kernel).
>>>
>> I think that wouldn't be a problem. If there is a possibility to change the
>> backend resulting in a change of config.max_change_size, a configuration
>> change notification can be injected to the guest, then guest will read and
>> get the new value.
>>
>> Best,
>> Wei
> This might not be supportable by all guests. E.g. some requests might
> already be in the queue. I'm not against reconfiguring devices across
> migration but I think it's a big project. As a 1st step I would focus on
> keeping configuration consistent across migrations.
>

Would it be common and fair for vendors to migrate from a new QEMU
to an old QEMU, which would downgrade the services that they provide
to their users?

Even for any reason that downgrade happens, I think it is
sensible to sacrifice something (e.g. drop the unsupported
requests from the queue) for the transition, right?

On the other side, packet drop is normally handled at the packet
protocol layer, e.g. TCP. Also, usually some amount of packet drop
is acceptable during live migration.

Best,
Wei
Michael S. Tsirkin June 18, 2017, 7:46 p.m. UTC | #37
On Sat, Jun 17, 2017 at 04:37:02PM +0800, Wei Wang wrote:
> On 06/16/2017 11:15 PM, Michael S. Tsirkin wrote:
> > On Fri, Jun 16, 2017 at 06:10:27PM +0800, Wei Wang wrote:
> > > On 06/16/2017 04:57 PM, Jason Wang wrote:
> > > > 
> > > > On 2017年06月16日 11:22, Michael S. Tsirkin wrote:
> > > > > > I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE.
> > > > > > 
> > > > > > For now, how about splitting it into two series of patches:
> > > > > > 1) enable 1024 tx queue size for vhost-user, to let the users of
> > > > > > vhost-user
> > > > > > to easily use 1024 queue size.
> > > > > Fine with me. 1) will get property from user but override it on
> > > > > !vhost-user. Do we need a protocol flag? It seems prudent but we get
> > > > > back to cross-version migration issues that are still pending solution.
> > > What do you have in mind about the protocol flag?
> > Merely this: older clients might be confused if they get
> > a s/g with 1024 entries.
> 
> I don't disagree to add that. But the client (i.e. vhost-user
> slave) is a host userspace program, and it seems that users can
> easily patch their host side applications if there is any issue,
> maybe we also don't need to be too prudent about that, do we?

I won't insist on this but it might not be easy. For example, are there
clients that want to forward the packet to host kernel as a s/g?

> 
> Also, the usage of the protocol flag looks like a duplicate of what
> we plan to add in the next step - the virtio common feature flag,
> VIRTIO_F_MAX_CHAIN_SIZE, which is more general and can be used
> across different backends.
>
> > > Btw, I just tested the patch of 1), and it works fine with migration from
> > > the
> > > patched to non-patched version of QEMU. I'll send it out. Please have a
> > > check.
> > > 
> > > 
> > > > > Marc Andre, what's the status of that work?
> > > > > 
> > > > > > 2) enable VIRTIO_F_MAX_CHAIN_SIZE,  to enhance robustness.
> > > > > Rather, to support it for more backends.
> > > > Ok, if we want to support different values of max chain size in the
> > > > future. It would be problematic for migration of cross backends,
> > > > consider the case when migrating from 2048 (vhost-user) to 1024
> > > > (qemu/vhost-kernel).
> > > > 
> > > I think that wouldn't be a problem. If there is a possibility to change the
> > > backend resulting in a change of config.max_change_size, a configuration
> > > change notification can be injected to the guest, then guest will read and
> > > get the new value.
> > > 
> > > Best,
> > > Wei
> > This might not be supportable by all guests. E.g. some requests might
> > already be in the queue. I'm not against reconfiguring devices across
> > migration but I think it's a big project. As a 1st step I would focus on
> > keeping configuration consistent across migrations.
> > 
> 
> Would it be common and fair for vendors to migrate from a new QEMU
> to an old QEMU, which would downgrade the services that they provide
> to their users?
> 
> Even for any reason that downgrade happens, I think it is
> sensible to sacrifice something (e.g. drop the unsupported
> requests from the queue) for the transition, right?
> 
> On the other side, packet drop is normally handled at the packet
> protocol layer, e.g. TCP. Also, usually some amount of packet drop
> is acceptable during live migration.
> 
> Best,
> Wei

This isn't how people expect migration to be handled ATM.
For now, I suggest we assume both sides need to be consistent.
Wang, Wei W June 19, 2017, 7:40 a.m. UTC | #38
On 06/19/2017 03:46 AM, Michael S. Tsirkin wrote:
>>>>
>>>> What do you have in mind about the protocol flag?
>>> Merely this: older clients might be confused if they get
>>> a s/g with 1024 entries.
>> I don't disagree to add that. But the client (i.e. vhost-user
>> slave) is a host userspace program, and it seems that users can
>> easily patch their host side applications if there is any issue,
>> maybe we also don't need to be too prudent about that, do we?
> I won't insist on this but it might not be easy. For example, are there
> clients that want to forward the packet to host kernel as a s/g?

Not sure about all the client implementation, but it sounds like a
strange and inefficient usage of vhost-user to pass packets to
the host kernel, given vhost kernel backend is already there
for the usage.

Best,
Wei
diff mbox

Patch

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7d091c9..5c82f54 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -33,8 +33,13 @@ 
 
 /* previously fixed value */
 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
+#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 1024
+
 /* for now, only allow larger queues; with virtio-1, guest can downsize */
 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
+#define VIRTIO_NET_TX_QUEUE_MIN_SIZE 256
+
+#define VIRTIO_NET_MAX_CHAIN_SIZE 1023
 
 /*
  * Calculate the number of bytes up to and including the given 'field' of
@@ -57,6 +62,8 @@  static VirtIOFeature feature_sizes[] = {
      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
     {.flags = 1 << VIRTIO_NET_F_MTU,
      .end = endof(struct virtio_net_config, mtu)},
+    {.flags = 1 << VIRTIO_F_MAX_CHAIN_SIZE,
+     .end = endof(struct virtio_net_config, max_chain_size)},
     {}
 };
 
@@ -84,6 +91,7 @@  static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
     virtio_stw_p(vdev, &netcfg.status, n->status);
     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
+    virtio_stw_p(vdev, &netcfg.max_chain_size, VIRTIO_NET_MAX_CHAIN_SIZE);
     memcpy(netcfg.mac, n->mac, ETH_ALEN);
     memcpy(config, &netcfg, n->config_size);
 }
@@ -635,9 +643,33 @@  static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
     return virtio_net_guest_offloads_by_features(vdev->guest_features);
 }
 
+static bool is_tx(int queue_index)
+{
+    return queue_index % 2 == 1;
+}
+
+static void virtio_net_reconfig_tx_queue_size(VirtIONet *n)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(n);
+    int i, num_queues = virtio_get_num_queues(vdev);
+
+    /* Return when the queue size is already less than the 1024 */
+    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE) {
+        return;
+    }
+
+    for (i = 0; i < num_queues; i++) {
+        if (is_tx(i)) {
+            n->net_conf.tx_queue_size = VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE / 2;
+            virtio_queue_set_num(vdev, i, n->net_conf.tx_queue_size);
+        }
+    }
+}
+
 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 {
     VirtIONet *n = VIRTIO_NET(vdev);
+    NetClientState *nc = qemu_get_queue(n->nic);
     int i;
 
     virtio_net_set_multiqueue(n,
@@ -649,6 +681,16 @@  static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
                                virtio_has_feature(features,
                                                   VIRTIO_F_VERSION_1));
 
+    /*
+     * When the traditional QEMU backend is used, using 1024 tx queue size
+     * requires the driver to support the VIRTIO_F_MAX_CHAIN_SIZE feature. If
+     * the feature is not supported, reconfigure the tx queue size to 512.
+     */
+    if (!get_vhost_net(nc->peer) &&
+        !virtio_has_feature(features, VIRTIO_F_MAX_CHAIN_SIZE)) {
+        virtio_net_reconfig_tx_queue_size(n);
+    }
+
     if (n->has_vnet_hdr) {
         n->curr_guest_offloads =
             virtio_net_guest_offloads_by_features(features);
@@ -1297,8 +1339,8 @@  static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
 
         out_num = elem->out_num;
         out_sg = elem->out_sg;
-        if (out_num < 1) {
-            virtio_error(vdev, "virtio-net header not in first element");
+        if (out_num < 1 || out_num > VIRTIO_NET_MAX_CHAIN_SIZE) {
+            virtio_error(vdev, "no packet or too large vring desc chain");
             virtqueue_detach_element(q->tx_vq, elem, 0);
             g_free(elem);
             return -EINVAL;
@@ -1496,13 +1538,15 @@  static void virtio_net_add_queue(VirtIONet *n, int index)
                                            virtio_net_handle_rx);
     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
         n->vqs[index].tx_vq =
-            virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
+            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
+                             virtio_net_handle_tx_timer);
         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                               virtio_net_tx_timer,
                                               &n->vqs[index]);
     } else {
         n->vqs[index].tx_vq =
-            virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
+            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
+                             virtio_net_handle_tx_bh);
         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
     }
 
@@ -1891,6 +1935,10 @@  static void virtio_net_device_realize(DeviceState *dev, Error **errp)
         n->host_features |= (0x1 << VIRTIO_NET_F_MTU);
     }
 
+    if (virtio_host_has_feature(vdev, VIRTIO_F_MAX_CHAIN_SIZE)) {
+        n->host_features |= (0x1 << VIRTIO_F_MAX_CHAIN_SIZE);
+    }
+
     virtio_net_set_config_size(n, n->host_features);
     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
 
@@ -1910,6 +1958,17 @@  static void virtio_net_device_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
+        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
+        (n->net_conf.tx_queue_size & (n->net_conf.tx_queue_size - 1))) {
+        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
+                   "must be a power of 2 between %d and %d.",
+                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
+                   VIRTQUEUE_MAX_SIZE);
+        virtio_cleanup(vdev);
+        return;
+    }
+
     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
@@ -2089,6 +2148,8 @@  static Property virtio_net_properties[] = {
     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
+    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
+                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 1eec9a2..fd944ba 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -36,6 +36,7 @@  typedef struct virtio_net_conf
     int32_t txburst;
     char *tx;
     uint16_t rx_queue_size;
+    uint16_t tx_queue_size;
     uint16_t mtu;
 } virtio_net_conf;
 
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 7b6edba..8e85e41 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -260,6 +260,8 @@  typedef struct VirtIORNGConf VirtIORNGConf;
                       VIRTIO_F_NOTIFY_ON_EMPTY, true), \
     DEFINE_PROP_BIT64("any_layout", _state, _field, \
                       VIRTIO_F_ANY_LAYOUT, true), \
+    DEFINE_PROP_BIT64("max_chain_size", _state, _field, \
+                      VIRTIO_F_MAX_CHAIN_SIZE, true), \
     DEFINE_PROP_BIT64("iommu_platform", _state, _field, \
                       VIRTIO_F_IOMMU_PLATFORM, false)
 
diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h
index b777069..b70cbfe 100644
--- a/include/standard-headers/linux/virtio_config.h
+++ b/include/standard-headers/linux/virtio_config.h
@@ -60,6 +60,9 @@ 
 #define VIRTIO_F_ANY_LAYOUT		27
 #endif /* VIRTIO_CONFIG_NO_LEGACY */
 
+/* Guest chains vring descriptors within a limit */
+#define VIRTIO_F_MAX_CHAIN_SIZE		31
+
 /* v1.0 compliant. */
 #define VIRTIO_F_VERSION_1		32
 
diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h
index 30ff249..729aaa8 100644
--- a/include/standard-headers/linux/virtio_net.h
+++ b/include/standard-headers/linux/virtio_net.h
@@ -76,6 +76,8 @@  struct virtio_net_config {
 	uint16_t max_virtqueue_pairs;
 	/* Default maximum transmit unit advice */
 	uint16_t mtu;
+        /* Maximum number of vring descriptors that can be chained */
+	uint16_t max_chain_size;
 } QEMU_PACKED;
 
 /*