Message ID | 1496653049-44530-1-git-send-email-wei.w.wang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote: > This patch enables the virtio-net tx queue size to be configurable > between 256 and 1024 by the user. The queue size specified by the > user should be power of 2. If "tx_queue_size" is not offered by the > user, the default queue size, 1024, will be used. > > For the traditional QEMU backend, setting the tx queue size to be 1024 > requires the guest virtio driver to support the VIRTIO_F_MAX_CHAIN_SIZE > feature. This feature restricts the guest driver from chaining 1024 > vring descriptors, which may cause the device side implementation to > send more than 1024 iov to writev. > > VIRTIO_F_MAX_CHAIN_SIZE is a common transport feature added for all > virtio devices. However, each device has the flexibility to set the max > chain size to limit its driver to chain vring descriptors. Currently, > the max chain size of the virtio-net device is set to 1023. > > In the case that the tx queue size is set to 1024 and the > VIRTIO_F_MAX_CHAIN_SIZE feature is not supported by the guest driver, > the tx queue size will be reconfigured to be 512. I'd like to see the reverse. Start with the current default. If VIRTIO_F_MAX_CHAIN_SIZE is negotiated, increase the queue size. > Signed-off-by: Wei Wang <wei.w.wang@intel.com> below should come after --- > RFC to v1 changes: > 1) change VIRTIO_F_MAX_CHAIN_SIZE to be a common virtio feature (was > virtio-net specific); > 2) change the default tx queue size to be 1024 (was 256); > 3) For the vhost backend case, setting tx queue size to be 1024 dosen't > require the VIRTIO_F_MAX_CHAIN_SIZE feature support. > --- > hw/net/virtio-net.c | 69 ++++++++++++++++++++++++-- > include/hw/virtio/virtio-net.h | 1 + > include/hw/virtio/virtio.h | 2 + > include/standard-headers/linux/virtio_config.h | 3 ++ > include/standard-headers/linux/virtio_net.h | 2 + > 5 files changed, 73 insertions(+), 4 deletions(-) > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c > index 7d091c9..5c82f54 100644 > --- a/hw/net/virtio-net.c > +++ b/hw/net/virtio-net.c > @@ -33,8 +33,13 @@ > > /* previously fixed value */ > #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 > +#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 1024 > + > /* for now, only allow larger queues; with virtio-1, guest can downsize */ > #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE > +#define VIRTIO_NET_TX_QUEUE_MIN_SIZE 256 > + > +#define VIRTIO_NET_MAX_CHAIN_SIZE 1023 > > /* > * Calculate the number of bytes up to and including the given 'field' of > @@ -57,6 +62,8 @@ static VirtIOFeature feature_sizes[] = { > .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, > {.flags = 1 << VIRTIO_NET_F_MTU, > .end = endof(struct virtio_net_config, mtu)}, > + {.flags = 1 << VIRTIO_F_MAX_CHAIN_SIZE, > + .end = endof(struct virtio_net_config, max_chain_size)}, > {} > }; > Using a generic VIRTIO_F_MAX_CHAIN_SIZE flag, so this should go into the generic virtio section, not virtio_net_config. > @@ -84,6 +91,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) > virtio_stw_p(vdev, &netcfg.status, n->status); > virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues); > virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); > + virtio_stw_p(vdev, &netcfg.max_chain_size, VIRTIO_NET_MAX_CHAIN_SIZE); > memcpy(netcfg.mac, n->mac, ETH_ALEN); > memcpy(config, &netcfg, n->config_size); > } > @@ -635,9 +643,33 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) > return virtio_net_guest_offloads_by_features(vdev->guest_features); > } > > +static bool is_tx(int queue_index) > +{ > + return queue_index % 2 == 1; > +} > + > +static void virtio_net_reconfig_tx_queue_size(VirtIONet *n) > +{ > + VirtIODevice *vdev = VIRTIO_DEVICE(n); > + int i, num_queues = virtio_get_num_queues(vdev); > + > + /* Return when the queue size is already less than the 1024 */ > + if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE) { > + return; > + } > + > + for (i = 0; i < num_queues; i++) { > + if (is_tx(i)) { > + n->net_conf.tx_queue_size = VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE / 2; > + virtio_queue_set_num(vdev, i, n->net_conf.tx_queue_size); > + } > + } > +} > + > static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) > { > VirtIONet *n = VIRTIO_NET(vdev); > + NetClientState *nc = qemu_get_queue(n->nic); > int i; > > virtio_net_set_multiqueue(n, > @@ -649,6 +681,16 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) > virtio_has_feature(features, > VIRTIO_F_VERSION_1)); > > + /* > + * When the traditional QEMU backend is used, using 1024 tx queue size > + * requires the driver to support the VIRTIO_F_MAX_CHAIN_SIZE feature. If > + * the feature is not supported, reconfigure the tx queue size to 512. > + */ > + if (!get_vhost_net(nc->peer) && > + !virtio_has_feature(features, VIRTIO_F_MAX_CHAIN_SIZE)) { > + virtio_net_reconfig_tx_queue_size(n); > + } > + > if (n->has_vnet_hdr) { > n->curr_guest_offloads = > virtio_net_guest_offloads_by_features(features); > @@ -1297,8 +1339,8 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) > > out_num = elem->out_num; > out_sg = elem->out_sg; > - if (out_num < 1) { > - virtio_error(vdev, "virtio-net header not in first element"); > + if (out_num < 1 || out_num > VIRTIO_NET_MAX_CHAIN_SIZE) { > + virtio_error(vdev, "no packet or too large vring desc chain"); > virtqueue_detach_element(q->tx_vq, elem, 0); > g_free(elem); > return -EINVAL; what about rx vq? we need to limit that as well, do we not? > @@ -1496,13 +1538,15 @@ static void virtio_net_add_queue(VirtIONet *n, int index) > virtio_net_handle_rx); > if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { > n->vqs[index].tx_vq = > - virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer); > + virtio_add_queue(vdev, n->net_conf.tx_queue_size, > + virtio_net_handle_tx_timer); > n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, > virtio_net_tx_timer, > &n->vqs[index]); > } else { > n->vqs[index].tx_vq = > - virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh); > + virtio_add_queue(vdev, n->net_conf.tx_queue_size, > + virtio_net_handle_tx_bh); > n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); > } > > @@ -1891,6 +1935,10 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) > n->host_features |= (0x1 << VIRTIO_NET_F_MTU); > } > > + if (virtio_host_has_feature(vdev, VIRTIO_F_MAX_CHAIN_SIZE)) { > + n->host_features |= (0x1 << VIRTIO_F_MAX_CHAIN_SIZE); > + } > + > virtio_net_set_config_size(n, n->host_features); > virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); > > @@ -1910,6 +1958,17 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) > return; > } > > + if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || > + n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || > + (n->net_conf.tx_queue_size & (n->net_conf.tx_queue_size - 1))) { Pls use is_power_of_2. > + error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " > + "must be a power of 2 between %d and %d.", > + n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, > + VIRTQUEUE_MAX_SIZE); I think management will need a way to discover the limits for this value. Not sure how. Cc QAPI maintainers. > + virtio_cleanup(vdev); > + return; > + } > + > n->max_queues = MAX(n->nic_conf.peers.queues, 1); > if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) { > error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " > @@ -2089,6 +2148,8 @@ static Property virtio_net_properties[] = { > DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), > DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, > VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), > + DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, > + VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), > DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), > DEFINE_PROP_END_OF_LIST(), > }; > diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h > index 1eec9a2..fd944ba 100644 > --- a/include/hw/virtio/virtio-net.h > +++ b/include/hw/virtio/virtio-net.h > @@ -36,6 +36,7 @@ typedef struct virtio_net_conf > int32_t txburst; > char *tx; > uint16_t rx_queue_size; > + uint16_t tx_queue_size; > uint16_t mtu; > } virtio_net_conf; > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > index 7b6edba..8e85e41 100644 > --- a/include/hw/virtio/virtio.h > +++ b/include/hw/virtio/virtio.h > @@ -260,6 +260,8 @@ typedef struct VirtIORNGConf VirtIORNGConf; > VIRTIO_F_NOTIFY_ON_EMPTY, true), \ > DEFINE_PROP_BIT64("any_layout", _state, _field, \ > VIRTIO_F_ANY_LAYOUT, true), \ > + DEFINE_PROP_BIT64("max_chain_size", _state, _field, \ > + VIRTIO_F_MAX_CHAIN_SIZE, true), \ > DEFINE_PROP_BIT64("iommu_platform", _state, _field, \ > VIRTIO_F_IOMMU_PLATFORM, false) > > diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h > index b777069..b70cbfe 100644 > --- a/include/standard-headers/linux/virtio_config.h > +++ b/include/standard-headers/linux/virtio_config.h > @@ -60,6 +60,9 @@ > #define VIRTIO_F_ANY_LAYOUT 27 > #endif /* VIRTIO_CONFIG_NO_LEGACY */ > > +/* Guest chains vring descriptors within a limit */ > +#define VIRTIO_F_MAX_CHAIN_SIZE 31 > + Pls use a flag in the high 32 bit. > /* v1.0 compliant. */ > #define VIRTIO_F_VERSION_1 32 > > diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h > index 30ff249..729aaa8 100644 > --- a/include/standard-headers/linux/virtio_net.h > +++ b/include/standard-headers/linux/virtio_net.h > @@ -76,6 +76,8 @@ struct virtio_net_config { > uint16_t max_virtqueue_pairs; > /* Default maximum transmit unit advice */ > uint16_t mtu; > + /* Maximum number of vring descriptors that can be chained */ > + uint16_t max_chain_size; > } QEMU_PACKED; > > /* > -- > 2.7.4
On 06/05/2017 10:38 AM, Michael S. Tsirkin wrote: > On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote: >> This patch enables the virtio-net tx queue size to be configurable >> between 256 and 1024 by the user. The queue size specified by the >> user should be power of 2. If "tx_queue_size" is not offered by the >> user, the default queue size, 1024, will be used. >> >> + if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || >> + n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || >> + (n->net_conf.tx_queue_size & (n->net_conf.tx_queue_size - 1))) { > > Pls use is_power_of_2. > >> + error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " >> + "must be a power of 2 between %d and %d.", No trailing '.' in error_setg() messages, please. >> + n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, >> + VIRTQUEUE_MAX_SIZE); > > I think management will need a way to discover the limits for > this value. Not sure how. Cc QAPI maintainers. Is this something that can be documented? Or is it a runtime-variable limit, where you will need to query the limit on a per-machine basis? Given that it looks like compile-time documentation, I'm leaning towards documentation being sufficient.
On Mon, Jun 05, 2017 at 10:41:56AM -0500, Eric Blake wrote: > On 06/05/2017 10:38 AM, Michael S. Tsirkin wrote: > > On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote: > >> This patch enables the virtio-net tx queue size to be configurable > >> between 256 and 1024 by the user. The queue size specified by the > >> user should be power of 2. If "tx_queue_size" is not offered by the > >> user, the default queue size, 1024, will be used. > >> > > >> + if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || > >> + n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || > >> + (n->net_conf.tx_queue_size & (n->net_conf.tx_queue_size - 1))) { > > > > Pls use is_power_of_2. > > > >> + error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " > >> + "must be a power of 2 between %d and %d.", > > No trailing '.' in error_setg() messages, please. > > >> + n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, > >> + VIRTQUEUE_MAX_SIZE); > > > > I think management will need a way to discover the limits for > > this value. Not sure how. Cc QAPI maintainers. > > Is this something that can be documented? Or is it a runtime-variable > limit, where you will need to query the limit on a per-machine basis? > Given that it looks like compile-time documentation, I'm leaning towards > documentation being sufficient. We are likely to change the limits with time. E.g. VIRTIO_NET_TX_QUEUE_MIN_SIZE is arbitrary. > -- > Eric Blake, Principal Software Engineer > Red Hat, Inc. +1-919-301-3266 > Virtualization: qemu.org | libvirt.org >
On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote: > @@ -1910,6 +1958,17 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) > return; > } > > + if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || > + n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || > + (n->net_conf.tx_queue_size & (n->net_conf.tx_queue_size - 1))) { > + error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " > + "must be a power of 2 between %d and %d.", > + n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, > + VIRTQUEUE_MAX_SIZE); > + virtio_cleanup(vdev); > + return; > + } > + > n->max_queues = MAX(n->nic_conf.peers.queues, 1); > if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) { > error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " Given that some configurations (e.g. legacy guest) ignore the value, I'm inclined to say just force the value to be within a reasonable limit.
On 06/05/2017 11:38 PM, Michael S. Tsirkin wrote: > On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote: > /* > * Calculate the number of bytes up to and including the given 'field' of > @@ -57,6 +62,8 @@ static VirtIOFeature feature_sizes[] = { > .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, > {.flags = 1 << VIRTIO_NET_F_MTU, > .end = endof(struct virtio_net_config, mtu)}, > + {.flags = 1 << VIRTIO_F_MAX_CHAIN_SIZE, > + .end = endof(struct virtio_net_config, max_chain_size)}, > {} > }; > > Using a generic VIRTIO_F_MAX_CHAIN_SIZE flag, so this should go into the > generic virtio section, not virtio_net_config. > Do you mean VIRTIO_PCI_xx under virtio_pci.h? The reason that the config field wasn't added there is because I didn't find space to put into the new 16-bit field into the existing layout. The last one is "#define VIRTIO_MSI_QUEUE_VECTOR 22". The per-driver configuration space directly follows that last register by "#define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24:20)" Changing the MACRO to something like VIRTIO_PCI_CONFIG_OFF(max_chain_size_enabled) would be difficult. Do you see a good way to add the new field here? >> @@ -84,6 +91,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) >> virtio_stw_p(vdev, &netcfg.status, n->status); >> virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues); >> virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); >> + virtio_stw_p(vdev, &netcfg.max_chain_size, VIRTIO_NET_MAX_CHAIN_SIZE); >> memcpy(netcfg.mac, n->mac, ETH_ALEN); >> memcpy(config, &netcfg, n->config_size); >> } >> @@ -635,9 +643,33 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) >> return virtio_net_guest_offloads_by_features(vdev->guest_features); >> } >> >> +static bool is_tx(int queue_index) >> +{ >> + return queue_index % 2 == 1; >> +} >> + >> +static void virtio_net_reconfig_tx_queue_size(VirtIONet *n) >> +{ >> + VirtIODevice *vdev = VIRTIO_DEVICE(n); >> + int i, num_queues = virtio_get_num_queues(vdev); >> + >> + /* Return when the queue size is already less than the 1024 */ >> + if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE) { >> + return; >> + } >> + >> + for (i = 0; i < num_queues; i++) { >> + if (is_tx(i)) { >> + n->net_conf.tx_queue_size = VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE / 2; >> + virtio_queue_set_num(vdev, i, n->net_conf.tx_queue_size); >> + } >> + } >> +} >> + >> static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) >> { >> VirtIONet *n = VIRTIO_NET(vdev); >> + NetClientState *nc = qemu_get_queue(n->nic); >> int i; >> >> virtio_net_set_multiqueue(n, >> @@ -649,6 +681,16 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) >> virtio_has_feature(features, >> VIRTIO_F_VERSION_1)); >> >> + /* >> + * When the traditional QEMU backend is used, using 1024 tx queue size >> + * requires the driver to support the VIRTIO_F_MAX_CHAIN_SIZE feature. If >> + * the feature is not supported, reconfigure the tx queue size to 512. >> + */ >> + if (!get_vhost_net(nc->peer) && >> + !virtio_has_feature(features, VIRTIO_F_MAX_CHAIN_SIZE)) { >> + virtio_net_reconfig_tx_queue_size(n); >> + } >> + >> if (n->has_vnet_hdr) { >> n->curr_guest_offloads = >> virtio_net_guest_offloads_by_features(features); >> @@ -1297,8 +1339,8 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) >> >> out_num = elem->out_num; >> out_sg = elem->out_sg; >> - if (out_num < 1) { >> - virtio_error(vdev, "virtio-net header not in first element"); >> + if (out_num < 1 || out_num > VIRTIO_NET_MAX_CHAIN_SIZE) { >> + virtio_error(vdev, "no packet or too large vring desc chain"); >> virtqueue_detach_element(q->tx_vq, elem, 0); >> g_free(elem); >> return -EINVAL; > what about rx vq? we need to limit that as well, do we not? I think the rx vq doesn't need the limit, because there is no off-by-one issue like the tx side writev(). > > diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h > index b777069..b70cbfe 100644 > --- a/include/standard-headers/linux/virtio_config.h > +++ b/include/standard-headers/linux/virtio_config.h > @@ -60,6 +60,9 @@ > #define VIRTIO_F_ANY_LAYOUT 27 > #endif /* VIRTIO_CONFIG_NO_LEGACY */ > > +/* Guest chains vring descriptors within a limit */ > +#define VIRTIO_F_MAX_CHAIN_SIZE 31 > + > Pls use a flag in the high 32 bit. I think this should be considered as a transport feature. How about changing VIRTIO_TRANSPORT_F_END to 35 (was 34), and use 34 for VIRTIO_F_MAX_CHAIN_SIZE? Best, Wei
On 06/05/2017 11:38 PM, Michael S. Tsirkin wrote: > On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote: >> This patch enables the virtio-net tx queue size to be configurable >> between 256 and 1024 by the user. The queue size specified by the >> user should be power of 2. If "tx_queue_size" is not offered by the >> user, the default queue size, 1024, will be used. >> >> For the traditional QEMU backend, setting the tx queue size to be 1024 >> requires the guest virtio driver to support the VIRTIO_F_MAX_CHAIN_SIZE >> feature. This feature restricts the guest driver from chaining 1024 >> vring descriptors, which may cause the device side implementation to >> send more than 1024 iov to writev. >> >> VIRTIO_F_MAX_CHAIN_SIZE is a common transport feature added for all >> virtio devices. However, each device has the flexibility to set the max >> chain size to limit its driver to chain vring descriptors. Currently, >> the max chain size of the virtio-net device is set to 1023. >> >> In the case that the tx queue size is set to 1024 and the >> VIRTIO_F_MAX_CHAIN_SIZE feature is not supported by the guest driver, >> the tx queue size will be reconfigured to be 512. > I'd like to see the reverse. Start with the current default. > If VIRTIO_F_MAX_CHAIN_SIZE is negotiated, increase the queue size. > OK, we can let the queue size start with 256, and how about increasing it to 1024 in the following two cases: 1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated; or 2) the backend is vhost. Best, Wei
On Wed, Jun 07, 2017 at 09:04:29AM +0800, Wei Wang wrote: > On 06/05/2017 11:38 PM, Michael S. Tsirkin wrote: > > On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote: > > > This patch enables the virtio-net tx queue size to be configurable > > > between 256 and 1024 by the user. The queue size specified by the > > > user should be power of 2. If "tx_queue_size" is not offered by the > > > user, the default queue size, 1024, will be used. > > > > > > For the traditional QEMU backend, setting the tx queue size to be 1024 > > > requires the guest virtio driver to support the VIRTIO_F_MAX_CHAIN_SIZE > > > feature. This feature restricts the guest driver from chaining 1024 > > > vring descriptors, which may cause the device side implementation to > > > send more than 1024 iov to writev. > > > > > > VIRTIO_F_MAX_CHAIN_SIZE is a common transport feature added for all > > > virtio devices. However, each device has the flexibility to set the max > > > chain size to limit its driver to chain vring descriptors. Currently, > > > the max chain size of the virtio-net device is set to 1023. > > > > > > In the case that the tx queue size is set to 1024 and the > > > VIRTIO_F_MAX_CHAIN_SIZE feature is not supported by the guest driver, > > > the tx queue size will be reconfigured to be 512. > > I'd like to see the reverse. Start with the current default. > > If VIRTIO_F_MAX_CHAIN_SIZE is negotiated, increase the queue size. > > > > OK, we can let the queue size start with 256, and how about > increasing it to 1024 in the following two cases: I think it should be 1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated and 2) user requested large size > 1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated; or > 2) the backend is vhost. For vhost we also need vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE. We also need to send the max chain size to backend. > Best, > Wei
On 06/09/2017 03:01 AM, Michael S. Tsirkin wrote: > On Wed, Jun 07, 2017 at 09:04:29AM +0800, Wei Wang wrote: >> On 06/05/2017 11:38 PM, Michael S. Tsirkin wrote: >>> On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote: >>>> This patch enables the virtio-net tx queue size to be configurable >>>> between 256 and 1024 by the user. The queue size specified by the >>>> user should be power of 2. If "tx_queue_size" is not offered by the >>>> user, the default queue size, 1024, will be used. >>>> >>>> For the traditional QEMU backend, setting the tx queue size to be 1024 >>>> requires the guest virtio driver to support the VIRTIO_F_MAX_CHAIN_SIZE >>>> feature. This feature restricts the guest driver from chaining 1024 >>>> vring descriptors, which may cause the device side implementation to >>>> send more than 1024 iov to writev. >>>> >>>> VIRTIO_F_MAX_CHAIN_SIZE is a common transport feature added for all >>>> virtio devices. However, each device has the flexibility to set the max >>>> chain size to limit its driver to chain vring descriptors. Currently, >>>> the max chain size of the virtio-net device is set to 1023. >>>> >>>> In the case that the tx queue size is set to 1024 and the >>>> VIRTIO_F_MAX_CHAIN_SIZE feature is not supported by the guest driver, >>>> the tx queue size will be reconfigured to be 512. >>> I'd like to see the reverse. Start with the current default. >>> If VIRTIO_F_MAX_CHAIN_SIZE is negotiated, increase the queue size. >>> >> OK, we can let the queue size start with 256, and how about >> increasing it to 1024 in the following two cases: > I think it should be > 1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated > and > 2) user requested large size > >> 1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated; or >> 2) the backend is vhost. > For vhost we also need vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE. > We also need to send the max chain size to backend. > I think the limitation that we are dealing with is that the virtio-net backend implementation in QEMU is possible to pass more than 1024 iov to writev. In this case, the QEMU backend uses the "max_chain_size" register to tell the driver the max size of the vring_desc chain. So, I think it should be the device (backend) sending the max size to the driver, rather than the other way around. For the vhost-user and vhost-net backend cases, they don't have such limitation as the QEMU backend, right? If no such limitation, I think without the negotiation of VIRTIO_F_MAX_CHAIN_SIZE, the device should be safe to use 1024 tx queue size if it is the vhost backend. Best, Wei
Ping for comments, thanks. On 06/09/2017 11:00 AM, Wei Wang wrote: > On 06/09/2017 03:01 AM, Michael S. Tsirkin wrote: >> On Wed, Jun 07, 2017 at 09:04:29AM +0800, Wei Wang wrote: >>> On 06/05/2017 11:38 PM, Michael S. Tsirkin wrote: >>>> On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote: >>>>> This patch enables the virtio-net tx queue size to be configurable >>>>> between 256 and 1024 by the user. The queue size specified by the >>>>> user should be power of 2. If "tx_queue_size" is not offered by the >>>>> user, the default queue size, 1024, will be used. >>>>> >>>>> For the traditional QEMU backend, setting the tx queue size to be >>>>> 1024 >>>>> requires the guest virtio driver to support the >>>>> VIRTIO_F_MAX_CHAIN_SIZE >>>>> feature. This feature restricts the guest driver from chaining 1024 >>>>> vring descriptors, which may cause the device side implementation to >>>>> send more than 1024 iov to writev. >>>>> >>>>> VIRTIO_F_MAX_CHAIN_SIZE is a common transport feature added for all >>>>> virtio devices. However, each device has the flexibility to set >>>>> the max >>>>> chain size to limit its driver to chain vring descriptors. Currently, >>>>> the max chain size of the virtio-net device is set to 1023. >>>>> >>>>> In the case that the tx queue size is set to 1024 and the >>>>> VIRTIO_F_MAX_CHAIN_SIZE feature is not supported by the guest driver, >>>>> the tx queue size will be reconfigured to be 512. >>>> I'd like to see the reverse. Start with the current default. >>>> If VIRTIO_F_MAX_CHAIN_SIZE is negotiated, increase the queue size. >>>> >>> OK, we can let the queue size start with 256, and how about >>> increasing it to 1024 in the following two cases: >> I think it should be >> 1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated >> and >> 2) user requested large size >> >>> 1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated; or >>> 2) the backend is vhost. >> For vhost we also need vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE. >> We also need to send the max chain size to backend. >> > I think the limitation that we are dealing with is that the virtio-net > backend implementation in QEMU is possible to pass more than > 1024 iov to writev. In this case, the QEMU backend uses the > "max_chain_size" register to tell the driver the max size of the > vring_desc chain. So, I think it should be the device (backend) > sending the max size to the driver, rather than the other way > around. > > For the vhost-user and vhost-net backend cases, they don't have > such limitation as the QEMU backend, right? > If no such limitation, I think without the negotiation of > VIRTIO_F_MAX_CHAIN_SIZE, the device should be safe to use 1024 > tx queue size if it is the vhost backend. > > Best, > Wei > > > > > > > --------------------------------------------------------------------- > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org >
On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote:
> Ping for comments, thanks.
This was only posted a week ago, might be a bit too short for some
people. A couple of weeks is more reasonable before you ping. Also, I
sent a bunch of comments on Thu, 8 Jun 2017. You should probably
address these.
On 06/13/2017 04:43 AM, Michael S. Tsirkin wrote: > On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote: >> Ping for comments, thanks. > This was only posted a week ago, might be a bit too short for some > people. OK, sorry for the push. > A couple of weeks is more reasonable before you ping. Also, I > sent a bunch of comments on Thu, 8 Jun 2017. You should probably > address these. > I responded to the comments. The main question is that I'm not sure why we need the vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE. IMHO, that should be a feature proposed to solve the possible issue caused by the QEMU implemented backend. Best, Wei
On 2017年06月13日 11:10, Wei Wang wrote: > On 06/13/2017 04:43 AM, Michael S. Tsirkin wrote: >> On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote: >>> Ping for comments, thanks. >> This was only posted a week ago, might be a bit too short for some >> people. > OK, sorry for the push. >> A couple of weeks is more reasonable before you ping. Also, I >> sent a bunch of comments on Thu, 8 Jun 2017. You should probably >> address these. >> > I responded to the comments. The main question is that I'm not sure > why we need the vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE. > IMHO, that should be a feature proposed to solve the possible issue > caused by the QEMU implemented backend. The issue is what if there's a mismatch of max #sgs between qemu and vhost? Thanks > > Best, > Wei > > --------------------------------------------------------------------- > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org >
On 06/13/2017 11:19 AM, Jason Wang wrote: > > > On 2017年06月13日 11:10, Wei Wang wrote: >> On 06/13/2017 04:43 AM, Michael S. Tsirkin wrote: >>> On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote: >>>> Ping for comments, thanks. >>> This was only posted a week ago, might be a bit too short for some >>> people. >> OK, sorry for the push. >>> A couple of weeks is more reasonable before you ping. Also, I >>> sent a bunch of comments on Thu, 8 Jun 2017. You should probably >>> address these. >>> >> I responded to the comments. The main question is that I'm not sure >> why we need the vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE. >> IMHO, that should be a feature proposed to solve the possible issue >> caused by the QEMU implemented backend. > > The issue is what if there's a mismatch of max #sgs between qemu and > vhost? > When the vhost backend is used, QEMU is not involved in the data path. The vhost backend directly gets what is offered by the guest from the vq. Why would there be a mismatch of max #sgs between QEMU and vhost, and what is the QEMU side max #sgs used for? Thanks. Best, Wei
On 2017年06月13日 11:51, Wei Wang wrote: > On 06/13/2017 11:19 AM, Jason Wang wrote: >> >> >> On 2017年06月13日 11:10, Wei Wang wrote: >>> On 06/13/2017 04:43 AM, Michael S. Tsirkin wrote: >>>> On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote: >>>>> Ping for comments, thanks. >>>> This was only posted a week ago, might be a bit too short for some >>>> people. >>> OK, sorry for the push. >>>> A couple of weeks is more reasonable before you ping. Also, I >>>> sent a bunch of comments on Thu, 8 Jun 2017. You should probably >>>> address these. >>>> >>> I responded to the comments. The main question is that I'm not sure >>> why we need the vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE. >>> IMHO, that should be a feature proposed to solve the possible issue >>> caused by the QEMU implemented backend. >> >> The issue is what if there's a mismatch of max #sgs between qemu and >> vhost? >> > > When the vhost backend is used, QEMU is not involved in the data path. > The vhost backend > directly gets what is offered by the guest from the vq. Why would > there be a mismatch of > max #sgs between QEMU and vhost, and what is the QEMU side max #sgs > used for? Thanks. You need query the backend max #sgs in this case at least. no? If not how do you know the value is supported by the backend? Thanks > > Best, > Wei > >
On 2017年06月13日 11:55, Jason Wang wrote: > > > On 2017年06月13日 11:51, Wei Wang wrote: >> On 06/13/2017 11:19 AM, Jason Wang wrote: >>> >>> >>> On 2017年06月13日 11:10, Wei Wang wrote: >>>> On 06/13/2017 04:43 AM, Michael S. Tsirkin wrote: >>>>> On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote: >>>>>> Ping for comments, thanks. >>>>> This was only posted a week ago, might be a bit too short for some >>>>> people. >>>> OK, sorry for the push. >>>>> A couple of weeks is more reasonable before you ping. Also, I >>>>> sent a bunch of comments on Thu, 8 Jun 2017. You should probably >>>>> address these. >>>>> >>>> I responded to the comments. The main question is that I'm not sure >>>> why we need the vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE. >>>> IMHO, that should be a feature proposed to solve the possible issue >>>> caused by the QEMU implemented backend. >>> >>> The issue is what if there's a mismatch of max #sgs between qemu and >>> vhost? >>> >> >> When the vhost backend is used, QEMU is not involved in the data >> path. The vhost backend >> directly gets what is offered by the guest from the vq. FYI, qemu will try to fallback to userspace if there's something wrong with vhost-kernel (e.g the IOMMU support). This doesn't work for vhost-user actually, but it works for vhost-kernel. Thanks >> Why would there be a mismatch of >> max #sgs between QEMU and vhost, and what is the QEMU side max #sgs >> used for? Thanks. > > You need query the backend max #sgs in this case at least. no? If not > how do you know the value is supported by the backend? > > Thanks > >> >> Best, >> Wei >> >> > >
On 06/13/2017 11:55 AM, Jason Wang wrote: > > On 2017年06月13日 11:51, Wei Wang wrote: >> On 06/13/2017 11:19 AM, Jason Wang wrote: >>> >>> On 2017年06月13日 11:10, Wei Wang wrote: >>>> On 06/13/2017 04:43 AM, Michael S. Tsirkin wrote: >>>>> On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote: >>>>>> Ping for comments, thanks. >>>>> This was only posted a week ago, might be a bit too short for some >>>>> people. >>>> OK, sorry for the push. >>>>> A couple of weeks is more reasonable before you ping. Also, I >>>>> sent a bunch of comments on Thu, 8 Jun 2017. You should probably >>>>> address these. >>>>> >>>> I responded to the comments. The main question is that I'm not sure >>>> why we need the vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE. >>>> IMHO, that should be a feature proposed to solve the possible issue >>>> caused by the QEMU implemented backend. >>> The issue is what if there's a mismatch of max #sgs between qemu and >>> vhost? >>> >> When the vhost backend is used, QEMU is not involved in the data path. >> The vhost backend >> directly gets what is offered by the guest from the vq. Why would >> there be a mismatch of >> max #sgs between QEMU and vhost, and what is the QEMU side max #sgs >> used for? Thanks. > You need query the backend max #sgs in this case at least. no? If not > how do you know the value is supported by the backend? > > Thanks > Here is my thought: vhost backend has already been supporting 1024 sgs, so I think it might not be necessary to query the max sgs that the vhost backend supports. In the setup phase, when QEMU detects the backend is vhost, it assumes 1024 max sgs is supported, instead of giving an extra call to query. The advantage is that people who is using the vhost backend can upgrade to use 1024 tx queue size by only applying the QEMU patches. Adding an extra call to query the size would need to patch their vhost backend (like vhost-user), which is difficult for them. Best, Wei
On 06/13/2017 11:59 AM, Jason Wang wrote: > > On 2017年06月13日 11:55, Jason Wang wrote: >> The issue is what if there's a mismatch of max #sgs between qemu and >> vhost? >> >>> When the vhost backend is used, QEMU is not involved in the data >>> path. The vhost backend >>> directly gets what is offered by the guest from the vq. > FYI, qemu will try to fallback to userspace if there's something wrong > with vhost-kernel (e.g the IOMMU support). This doesn't work for > vhost-user actually, but it works for vhost-kernel. > > Thanks > That wouldn't be a problem. When it falls back to the QEMU backend, the "max_chain_size" will be set according to the QEMU backend (e.g. 1023). Guest will read the max_chain_size register. Best, Wei
On 2017年06月13日 14:08, Wei Wang wrote: > On 06/13/2017 11:55 AM, Jason Wang wrote: >> >> On 2017年06月13日 11:51, Wei Wang wrote: >>> On 06/13/2017 11:19 AM, Jason Wang wrote: >>>> >>>> On 2017年06月13日 11:10, Wei Wang wrote: >>>>> On 06/13/2017 04:43 AM, Michael S. Tsirkin wrote: >>>>>> On Mon, Jun 12, 2017 at 05:30:46PM +0800, Wei Wang wrote: >>>>>>> Ping for comments, thanks. >>>>>> This was only posted a week ago, might be a bit too short for some >>>>>> people. >>>>> OK, sorry for the push. >>>>>> A couple of weeks is more reasonable before you ping. Also, I >>>>>> sent a bunch of comments on Thu, 8 Jun 2017. You should probably >>>>>> address these. >>>>>> >>>>> I responded to the comments. The main question is that I'm not sure >>>>> why we need the vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE. >>>>> IMHO, that should be a feature proposed to solve the possible issue >>>>> caused by the QEMU implemented backend. >>>> The issue is what if there's a mismatch of max #sgs between qemu and >>>> vhost? >>>> >>> When the vhost backend is used, QEMU is not involved in the data path. >>> The vhost backend >>> directly gets what is offered by the guest from the vq. Why would >>> there be a mismatch of >>> max #sgs between QEMU and vhost, and what is the QEMU side max #sgs >>> used for? Thanks. >> You need query the backend max #sgs in this case at least. no? If not >> how do you know the value is supported by the backend? >> >> Thanks >> > Here is my thought: vhost backend has already been supporting 1024 sgs, > so I think it might not be necessary to query the max sgs that the vhost > backend supports. In the setup phase, when QEMU detects the backend is > vhost, it assumes 1024 max sgs is supported, instead of giving an extra > call to query. We can probably assume vhost kernel supports up to 1024 sgs. But how about for other vhost-user backends? And what you said here makes me ask one of my questions in the past: Do we have plan to extend 1024 to a larger value or 1024 looks good for the future years? If we only care about 1024, there's even no need for a new config filed, a feature flag is more than enough. If we want to extend it to e.g 2048, we definitely need to query vhost backend's limit (even for vhost-kernel). Thanks > > The advantage is that people who is using the vhost backend can upgrade > to use 1024 tx queue size by only applying the QEMU patches. Adding an > extra > call to query the size would need to patch their vhost backend (like > vhost-user), > which is difficult for them. > > > Best, > Wei > > > > > > > > > --------------------------------------------------------------------- > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org >
On 2017年06月13日 14:13, Wei Wang wrote: > On 06/13/2017 11:59 AM, Jason Wang wrote: >> >> On 2017年06月13日 11:55, Jason Wang wrote: >>> The issue is what if there's a mismatch of max #sgs between qemu and >>> vhost? >>> >>>> When the vhost backend is used, QEMU is not involved in the data >>>> path. The vhost backend >>>> directly gets what is offered by the guest from the vq. >> FYI, qemu will try to fallback to userspace if there's something wrong >> with vhost-kernel (e.g the IOMMU support). This doesn't work for >> vhost-user actually, but it works for vhost-kernel. >> >> Thanks >> > > That wouldn't be a problem. When it falls back to the QEMU backend, > the "max_chain_size" will be set according to the QEMU backend > (e.g. 1023). Guest will read the max_chain_size register. What if there's backend that supports less than 1023? Or in the future, we increase the limit to e.g 2048? Thanks > > Best, > Wei >
On 06/13/2017 02:29 PM, Jason Wang wrote: > The issue is what if there's a mismatch of max #sgs between qemu and >>>> When the vhost backend is used, QEMU is not involved in the data path. >>>> The vhost backend >>>> directly gets what is offered by the guest from the vq. Why would >>>> there be a mismatch of >>>> max #sgs between QEMU and vhost, and what is the QEMU side max #sgs >>>> used for? Thanks. >>> You need query the backend max #sgs in this case at least. no? If not >>> how do you know the value is supported by the backend? >>> >>> Thanks >>> >> Here is my thought: vhost backend has already been supporting 1024 sgs, >> so I think it might not be necessary to query the max sgs that the vhost >> backend supports. In the setup phase, when QEMU detects the backend is >> vhost, it assumes 1024 max sgs is supported, instead of giving an extra >> call to query. > > We can probably assume vhost kernel supports up to 1024 sgs. But how > about for other vhost-user backends? > So far, I haven't seen any vhost backend implementation supporting less than 1024 sgs. > And what you said here makes me ask one of my questions in the past: > > Do we have plan to extend 1024 to a larger value or 1024 looks good > for the future years? If we only care about 1024, there's even no need > for a new config filed, a feature flag is more than enough. If we want > to extend it to e.g 2048, we definitely need to query vhost backend's > limit (even for vhost-kernel). > According to virtio spec (e.g. 2.4.4), unreasonably large descriptors are not encouraged to be used by the guest. If possible, I would suggest to use 1024 as the largest number of descriptors that the guest can chain, even when we have larger queue size in the future. That is, if (backend == QEMU backend) config.max_chain_size = 1023 (defined by the qemu backend implementation); else if (backend == vhost) config.max_chain_size = 1024; It is transparent to the guest. From the guest's point of view, all it knows is a value given to him via reading config.max_chain_size. Best, Wei
On 06/13/2017 02:31 PM, Jason Wang wrote: > > > On 2017年06月13日 14:13, Wei Wang wrote: >> On 06/13/2017 11:59 AM, Jason Wang wrote: >>> >>> On 2017年06月13日 11:55, Jason Wang wrote: >>>> The issue is what if there's a mismatch of max #sgs between qemu and >>>> vhost? >>>> >>>>> When the vhost backend is used, QEMU is not involved in the data >>>>> path. The vhost backend >>>>> directly gets what is offered by the guest from the vq. >>> FYI, qemu will try to fallback to userspace if there's something wrong >>> with vhost-kernel (e.g the IOMMU support). This doesn't work for >>> vhost-user actually, but it works for vhost-kernel. >>> >>> Thanks >>> >> >> That wouldn't be a problem. When it falls back to the QEMU backend, >> the "max_chain_size" will be set according to the QEMU backend >> (e.g. 1023). Guest will read the max_chain_size register. > > What if there's backend that supports less than 1023? Or in the > future, we increase the limit to e.g 2048? > I agree the potential issue is that it's assumed (hardcoded) that the vhost backend supports 1024 chain size. This only comes to be an issue if the vhost backend implementation supports less than 1024 in the future, but that can be solved by introducing another feature to support that. If that's acceptable, some customers will be easy to upgrade their already deployed products to use 1024 tx queue size. Best, Wei
On 2017年06月13日 15:17, Wei Wang wrote: > On 06/13/2017 02:29 PM, Jason Wang wrote: >> The issue is what if there's a mismatch of max #sgs between qemu and >>>>> When the vhost backend is used, QEMU is not involved in the data >>>>> path. >>>>> The vhost backend >>>>> directly gets what is offered by the guest from the vq. Why would >>>>> there be a mismatch of >>>>> max #sgs between QEMU and vhost, and what is the QEMU side max #sgs >>>>> used for? Thanks. >>>> You need query the backend max #sgs in this case at least. no? If not >>>> how do you know the value is supported by the backend? >>>> >>>> Thanks >>>> >>> Here is my thought: vhost backend has already been supporting 1024 sgs, >>> so I think it might not be necessary to query the max sgs that the >>> vhost >>> backend supports. In the setup phase, when QEMU detects the backend is >>> vhost, it assumes 1024 max sgs is supported, instead of giving an extra >>> call to query. >> >> We can probably assume vhost kernel supports up to 1024 sgs. But how >> about for other vhost-user backends? >> > So far, I haven't seen any vhost backend implementation supporting > less than 1024 sgs. Since vhost-user is an open protocol we can not check each implementation (some may be even close sourced). For safety, we need an explicit clarification on this. > > >> And what you said here makes me ask one of my questions in the past: >> >> Do we have plan to extend 1024 to a larger value or 1024 looks good >> for the future years? If we only care about 1024, there's even no >> need for a new config filed, a feature flag is more than enough. If >> we want to extend it to e.g 2048, we definitely need to query vhost >> backend's limit (even for vhost-kernel). >> > > According to virtio spec (e.g. 2.4.4), unreasonably large descriptors are > not encouraged to be used by the guest. If possible, I would suggest > to use > 1024 as the largest number of descriptors that the guest can chain, > even when > we have larger queue size in the future. That is, > if (backend == QEMU backend) > config.max_chain_size = 1023 (defined by the qemu backend > implementation); > else if (backend == vhost) > config.max_chain_size = 1024; > > It is transparent to the guest. From the guest's point of view, all it > knows is a value > given to him via reading config.max_chain_size. So not transparent actually, guest at least guest need to see and check for this. So the question still, since you only care about two cases in fact: - backend supports 1024 - backend supports <1024 (qemu or whatever other backends) So it looks like a new feature flag is more than enough. If device(backends) support this feature, it can make sure 1024 sgs is supported? Thanks > > > Best, > Wei > > > > > > > > > > > > > > > > > >
On 06/13/2017 05:04 PM, Jason Wang wrote: > > > On 2017年06月13日 15:17, Wei Wang wrote: >> On 06/13/2017 02:29 PM, Jason Wang wrote: >>> The issue is what if there's a mismatch of max #sgs between qemu and >>>>>> When the vhost backend is used, QEMU is not involved in the data >>>>>> path. >>>>>> The vhost backend >>>>>> directly gets what is offered by the guest from the vq. Why would >>>>>> there be a mismatch of >>>>>> max #sgs between QEMU and vhost, and what is the QEMU side max #sgs >>>>>> used for? Thanks. >>>>> You need query the backend max #sgs in this case at least. no? If not >>>>> how do you know the value is supported by the backend? >>>>> >>>>> Thanks >>>>> >>>> Here is my thought: vhost backend has already been supporting 1024 >>>> sgs, >>>> so I think it might not be necessary to query the max sgs that the >>>> vhost >>>> backend supports. In the setup phase, when QEMU detects the backend is >>>> vhost, it assumes 1024 max sgs is supported, instead of giving an >>>> extra >>>> call to query. >>> >>> We can probably assume vhost kernel supports up to 1024 sgs. But how >>> about for other vhost-user backends? >>> >> So far, I haven't seen any vhost backend implementation supporting >> less than 1024 sgs. > > Since vhost-user is an open protocol we can not check each > implementation (some may be even close sourced). For safety, we need > an explicit clarification on this. > >> >> >>> And what you said here makes me ask one of my questions in the past: >>> >>> Do we have plan to extend 1024 to a larger value or 1024 looks good >>> for the future years? If we only care about 1024, there's even no >>> need for a new config filed, a feature flag is more than enough. If >>> we want to extend it to e.g 2048, we definitely need to query vhost >>> backend's limit (even for vhost-kernel). >>> >> >> According to virtio spec (e.g. 2.4.4), unreasonably large descriptors >> are >> not encouraged to be used by the guest. If possible, I would suggest >> to use >> 1024 as the largest number of descriptors that the guest can chain, >> even when >> we have larger queue size in the future. That is, >> if (backend == QEMU backend) >> config.max_chain_size = 1023 (defined by the qemu backend >> implementation); >> else if (backend == vhost) >> config.max_chain_size = 1024; >> >> It is transparent to the guest. From the guest's point of view, all >> it knows is a value >> given to him via reading config.max_chain_size. > > So not transparent actually, guest at least guest need to see and > check for this. So the question still, since you only care about two > cases in fact: > > - backend supports 1024 > - backend supports <1024 (qemu or whatever other backends) > > So it looks like a new feature flag is more than enough. If > device(backends) support this feature, it can make sure 1024 sgs is > supported? > That wouldn't be enough. For example, QEMU3.0 backend supports max_chain_size=1023, while QEMU4.0 backend supports max_chain_size=1021. How would the guest know the max size with the same feature flag? Would it still chain 1023 descriptors with QEMU4.0? Best, Wei
On 2017年06月13日 17:50, Wei Wang wrote: > On 06/13/2017 05:04 PM, Jason Wang wrote: >> >> >> On 2017年06月13日 15:17, Wei Wang wrote: >>> On 06/13/2017 02:29 PM, Jason Wang wrote: >>>> The issue is what if there's a mismatch of max #sgs between qemu and >>>>>>> When the vhost backend is used, QEMU is not involved in the data >>>>>>> path. >>>>>>> The vhost backend >>>>>>> directly gets what is offered by the guest from the vq. Why would >>>>>>> there be a mismatch of >>>>>>> max #sgs between QEMU and vhost, and what is the QEMU side max #sgs >>>>>>> used for? Thanks. >>>>>> You need query the backend max #sgs in this case at least. no? If >>>>>> not >>>>>> how do you know the value is supported by the backend? >>>>>> >>>>>> Thanks >>>>>> >>>>> Here is my thought: vhost backend has already been supporting 1024 >>>>> sgs, >>>>> so I think it might not be necessary to query the max sgs that the >>>>> vhost >>>>> backend supports. In the setup phase, when QEMU detects the >>>>> backend is >>>>> vhost, it assumes 1024 max sgs is supported, instead of giving an >>>>> extra >>>>> call to query. >>>> >>>> We can probably assume vhost kernel supports up to 1024 sgs. But >>>> how about for other vhost-user backends? >>>> >>> So far, I haven't seen any vhost backend implementation supporting >>> less than 1024 sgs. >> >> Since vhost-user is an open protocol we can not check each >> implementation (some may be even close sourced). For safety, we need >> an explicit clarification on this. >> >>> >>> >>>> And what you said here makes me ask one of my questions in the past: >>>> >>>> Do we have plan to extend 1024 to a larger value or 1024 looks good >>>> for the future years? If we only care about 1024, there's even no >>>> need for a new config filed, a feature flag is more than enough. If >>>> we want to extend it to e.g 2048, we definitely need to query vhost >>>> backend's limit (even for vhost-kernel). >>>> >>> >>> According to virtio spec (e.g. 2.4.4), unreasonably large >>> descriptors are >>> not encouraged to be used by the guest. If possible, I would suggest >>> to use >>> 1024 as the largest number of descriptors that the guest can chain, >>> even when >>> we have larger queue size in the future. That is, >>> if (backend == QEMU backend) >>> config.max_chain_size = 1023 (defined by the qemu backend >>> implementation); >>> else if (backend == vhost) >>> config.max_chain_size = 1024; >>> >>> It is transparent to the guest. From the guest's point of view, all >>> it knows is a value >>> given to him via reading config.max_chain_size. >> >> So not transparent actually, guest at least guest need to see and >> check for this. So the question still, since you only care about two >> cases in fact: >> >> - backend supports 1024 >> - backend supports <1024 (qemu or whatever other backends) >> >> So it looks like a new feature flag is more than enough. If >> device(backends) support this feature, it can make sure 1024 sgs is >> supported? >> > > That wouldn't be enough. For example, QEMU3.0 backend supports > max_chain_size=1023, > while QEMU4.0 backend supports max_chain_size=1021. How would the > guest know > the max size with the same feature flag? Would it still chain 1023 > descriptors with QEMU4.0? > > Best, > Wei I believe we won't go back to less than 1024 in the future. It may be worth to add a unittest for this to catch regression early. Thanks > > > >
On 2017年06月13日 18:46, Jason Wang wrote: > > > On 2017年06月13日 17:50, Wei Wang wrote: >> On 06/13/2017 05:04 PM, Jason Wang wrote: >>> >>> >>> On 2017年06月13日 15:17, Wei Wang wrote: >>>> On 06/13/2017 02:29 PM, Jason Wang wrote: >>>>> The issue is what if there's a mismatch of max #sgs between qemu and >>>>>>>> When the vhost backend is used, QEMU is not involved in the >>>>>>>> data path. >>>>>>>> The vhost backend >>>>>>>> directly gets what is offered by the guest from the vq. Why would >>>>>>>> there be a mismatch of >>>>>>>> max #sgs between QEMU and vhost, and what is the QEMU side max >>>>>>>> #sgs >>>>>>>> used for? Thanks. >>>>>>> You need query the backend max #sgs in this case at least. no? >>>>>>> If not >>>>>>> how do you know the value is supported by the backend? >>>>>>> >>>>>>> Thanks >>>>>>> >>>>>> Here is my thought: vhost backend has already been supporting >>>>>> 1024 sgs, >>>>>> so I think it might not be necessary to query the max sgs that >>>>>> the vhost >>>>>> backend supports. In the setup phase, when QEMU detects the >>>>>> backend is >>>>>> vhost, it assumes 1024 max sgs is supported, instead of giving an >>>>>> extra >>>>>> call to query. >>>>> >>>>> We can probably assume vhost kernel supports up to 1024 sgs. But >>>>> how about for other vhost-user backends? >>>>> >>>> So far, I haven't seen any vhost backend implementation supporting >>>> less than 1024 sgs. >>> >>> Since vhost-user is an open protocol we can not check each >>> implementation (some may be even close sourced). For safety, we need >>> an explicit clarification on this. >>> >>>> >>>> >>>>> And what you said here makes me ask one of my questions in the past: >>>>> >>>>> Do we have plan to extend 1024 to a larger value or 1024 looks >>>>> good for the future years? If we only care about 1024, there's >>>>> even no need for a new config filed, a feature flag is more than >>>>> enough. If we want to extend it to e.g 2048, we definitely need to >>>>> query vhost backend's limit (even for vhost-kernel). >>>>> >>>> >>>> According to virtio spec (e.g. 2.4.4), unreasonably large >>>> descriptors are >>>> not encouraged to be used by the guest. If possible, I would >>>> suggest to use >>>> 1024 as the largest number of descriptors that the guest can chain, >>>> even when >>>> we have larger queue size in the future. That is, >>>> if (backend == QEMU backend) >>>> config.max_chain_size = 1023 (defined by the qemu backend >>>> implementation); >>>> else if (backend == vhost) >>>> config.max_chain_size = 1024; >>>> >>>> It is transparent to the guest. From the guest's point of view, all >>>> it knows is a value >>>> given to him via reading config.max_chain_size. >>> >>> So not transparent actually, guest at least guest need to see and >>> check for this. So the question still, since you only care about two >>> cases in fact: >>> >>> - backend supports 1024 >>> - backend supports <1024 (qemu or whatever other backends) >>> >>> So it looks like a new feature flag is more than enough. If >>> device(backends) support this feature, it can make sure 1024 sgs is >>> supported? >>> >> >> That wouldn't be enough. For example, QEMU3.0 backend supports >> max_chain_size=1023, >> while QEMU4.0 backend supports max_chain_size=1021. How would the >> guest know >> the max size with the same feature flag? Would it still chain 1023 >> descriptors with QEMU4.0? >> >> Best, >> Wei > > I believe we won't go back to less than 1024 in the future. It may be > worth to add a unittest for this to catch regression early. > > Thanks > Consider the queue size is 256 now, I think maybe we can first make tx queue size configurable up to 1024, and then do the #sg stuffs on top. What's your opinion, Michael? Thanks
On Wed, Jun 14, 2017 at 07:26:54PM +0800, Jason Wang wrote: > > > On 2017年06月13日 18:46, Jason Wang wrote: > > > > > > On 2017年06月13日 17:50, Wei Wang wrote: > > > On 06/13/2017 05:04 PM, Jason Wang wrote: > > > > > > > > > > > > On 2017年06月13日 15:17, Wei Wang wrote: > > > > > On 06/13/2017 02:29 PM, Jason Wang wrote: > > > > > > The issue is what if there's a mismatch of max #sgs between qemu and > > > > > > > > > When the vhost backend is used, QEMU is not > > > > > > > > > involved in the data path. > > > > > > > > > The vhost backend > > > > > > > > > directly gets what is offered by the guest from the vq. Why would > > > > > > > > > there be a mismatch of > > > > > > > > > max #sgs between QEMU and vhost, and what is > > > > > > > > > the QEMU side max #sgs > > > > > > > > > used for? Thanks. > > > > > > > > You need query the backend max #sgs in this case > > > > > > > > at least. no? If not > > > > > > > > how do you know the value is supported by the backend? > > > > > > > > > > > > > > > > Thanks > > > > > > > > > > > > > > > Here is my thought: vhost backend has already been > > > > > > > supporting 1024 sgs, > > > > > > > so I think it might not be necessary to query the > > > > > > > max sgs that the vhost > > > > > > > backend supports. In the setup phase, when QEMU > > > > > > > detects the backend is > > > > > > > vhost, it assumes 1024 max sgs is supported, instead > > > > > > > of giving an extra > > > > > > > call to query. > > > > > > > > > > > > We can probably assume vhost kernel supports up to 1024 > > > > > > sgs. But how about for other vhost-user backends? > > > > > > > > > > > So far, I haven't seen any vhost backend implementation > > > > > supporting less than 1024 sgs. > > > > > > > > Since vhost-user is an open protocol we can not check each > > > > implementation (some may be even close sourced). For safety, we > > > > need an explicit clarification on this. > > > > > > > > > > > > > > > > > > > > And what you said here makes me ask one of my questions in the past: > > > > > > > > > > > > Do we have plan to extend 1024 to a larger value or 1024 > > > > > > looks good for the future years? If we only care about > > > > > > 1024, there's even no need for a new config filed, a > > > > > > feature flag is more than enough. If we want to extend > > > > > > it to e.g 2048, we definitely need to query vhost > > > > > > backend's limit (even for vhost-kernel). > > > > > > > > > > > > > > > > According to virtio spec (e.g. 2.4.4), unreasonably large > > > > > descriptors are > > > > > not encouraged to be used by the guest. If possible, I would > > > > > suggest to use > > > > > 1024 as the largest number of descriptors that the guest can > > > > > chain, even when > > > > > we have larger queue size in the future. That is, > > > > > if (backend == QEMU backend) > > > > > config.max_chain_size = 1023 (defined by the qemu > > > > > backend implementation); > > > > > else if (backend == vhost) > > > > > config.max_chain_size = 1024; > > > > > > > > > > It is transparent to the guest. From the guest's point of > > > > > view, all it knows is a value > > > > > given to him via reading config.max_chain_size. > > > > > > > > So not transparent actually, guest at least guest need to see > > > > and check for this. So the question still, since you only care > > > > about two cases in fact: > > > > > > > > - backend supports 1024 > > > > - backend supports <1024 (qemu or whatever other backends) > > > > > > > > So it looks like a new feature flag is more than enough. If > > > > device(backends) support this feature, it can make sure 1024 sgs > > > > is supported? > > > > > > > > > > That wouldn't be enough. For example, QEMU3.0 backend supports > > > max_chain_size=1023, > > > while QEMU4.0 backend supports max_chain_size=1021. How would the > > > guest know > > > the max size with the same feature flag? Would it still chain 1023 > > > descriptors with QEMU4.0? > > > > > > Best, > > > Wei > > > > I believe we won't go back to less than 1024 in the future. It may be > > worth to add a unittest for this to catch regression early. > > > > Thanks I think I disagree with that. Smaller pipes a better (e.g. less cache pressure) and you only need huge pipes because host thread gets scheduled out for too long. With more CPUs there's less of a chance of an overcommit so we'll be able to get by with smaller pipes in the future. > > Consider the queue size is 256 now, I think maybe we can first make tx queue > size configurable up to 1024, and then do the #sg stuffs on top. > > What's your opinion, Michael? > > Thanks With a kernel backend, 1024 is problematic since we are then unable to add any entries or handle cases where an entry crosses an MR region boundary. We could support up to 512 with a kernel backend but no one seems to want that :) With vhost-user the backend might be able to handle that. So an acceptable option would be to allow 1K with vhost-user backends only, trim it back with other backends.
On 2017年06月14日 23:22, Michael S. Tsirkin wrote: > On Wed, Jun 14, 2017 at 07:26:54PM +0800, Jason Wang wrote: >> >> On 2017年06月13日 18:46, Jason Wang wrote: >>> >>> On 2017年06月13日 17:50, Wei Wang wrote: >>>> On 06/13/2017 05:04 PM, Jason Wang wrote: >>>>> >>>>> On 2017年06月13日 15:17, Wei Wang wrote: >>>>>> On 06/13/2017 02:29 PM, Jason Wang wrote: >>>>>>> The issue is what if there's a mismatch of max #sgs between qemu and >>>>>>>>>> When the vhost backend is used, QEMU is not >>>>>>>>>> involved in the data path. >>>>>>>>>> The vhost backend >>>>>>>>>> directly gets what is offered by the guest from the vq. Why would >>>>>>>>>> there be a mismatch of >>>>>>>>>> max #sgs between QEMU and vhost, and what is >>>>>>>>>> the QEMU side max #sgs >>>>>>>>>> used for? Thanks. >>>>>>>>> You need query the backend max #sgs in this case >>>>>>>>> at least. no? If not >>>>>>>>> how do you know the value is supported by the backend? >>>>>>>>> >>>>>>>>> Thanks >>>>>>>>> >>>>>>>> Here is my thought: vhost backend has already been >>>>>>>> supporting 1024 sgs, >>>>>>>> so I think it might not be necessary to query the >>>>>>>> max sgs that the vhost >>>>>>>> backend supports. In the setup phase, when QEMU >>>>>>>> detects the backend is >>>>>>>> vhost, it assumes 1024 max sgs is supported, instead >>>>>>>> of giving an extra >>>>>>>> call to query. >>>>>>> We can probably assume vhost kernel supports up to 1024 >>>>>>> sgs. But how about for other vhost-user backends? >>>>>>> >>>>>> So far, I haven't seen any vhost backend implementation >>>>>> supporting less than 1024 sgs. >>>>> Since vhost-user is an open protocol we can not check each >>>>> implementation (some may be even close sourced). For safety, we >>>>> need an explicit clarification on this. >>>>> >>>>>> >>>>>>> And what you said here makes me ask one of my questions in the past: >>>>>>> >>>>>>> Do we have plan to extend 1024 to a larger value or 1024 >>>>>>> looks good for the future years? If we only care about >>>>>>> 1024, there's even no need for a new config filed, a >>>>>>> feature flag is more than enough. If we want to extend >>>>>>> it to e.g 2048, we definitely need to query vhost >>>>>>> backend's limit (even for vhost-kernel). >>>>>>> >>>>>> According to virtio spec (e.g. 2.4.4), unreasonably large >>>>>> descriptors are >>>>>> not encouraged to be used by the guest. If possible, I would >>>>>> suggest to use >>>>>> 1024 as the largest number of descriptors that the guest can >>>>>> chain, even when >>>>>> we have larger queue size in the future. That is, >>>>>> if (backend == QEMU backend) >>>>>> config.max_chain_size = 1023 (defined by the qemu >>>>>> backend implementation); >>>>>> else if (backend == vhost) >>>>>> config.max_chain_size = 1024; >>>>>> >>>>>> It is transparent to the guest. From the guest's point of >>>>>> view, all it knows is a value >>>>>> given to him via reading config.max_chain_size. >>>>> So not transparent actually, guest at least guest need to see >>>>> and check for this. So the question still, since you only care >>>>> about two cases in fact: >>>>> >>>>> - backend supports 1024 >>>>> - backend supports <1024 (qemu or whatever other backends) >>>>> >>>>> So it looks like a new feature flag is more than enough. If >>>>> device(backends) support this feature, it can make sure 1024 sgs >>>>> is supported? >>>>> >>>> That wouldn't be enough. For example, QEMU3.0 backend supports >>>> max_chain_size=1023, >>>> while QEMU4.0 backend supports max_chain_size=1021. How would the >>>> guest know >>>> the max size with the same feature flag? Would it still chain 1023 >>>> descriptors with QEMU4.0? >>>> >>>> Best, >>>> Wei >>> I believe we won't go back to less than 1024 in the future. It may be >>> worth to add a unittest for this to catch regression early. >>> >>> Thanks > I think I disagree with that. Smaller pipes a better (e.g. less cache > pressure) and you only need huge pipes because host thread gets > scheduled out for too long. With more CPUs there's less of a chance of > an overcommit so we'll be able to get by with smaller pipes in the > future. Agree, but we are talking about the upper limit. Even if 1024 is supported, small number of #sgs is still encouraged. > >> Consider the queue size is 256 now, I think maybe we can first make tx queue >> size configurable up to 1024, and then do the #sg stuffs on top. >> >> What's your opinion, Michael? >> >> Thanks > With a kernel backend, 1024 is problematic since we are then unable > to add any entries or handle cases where an entry crosses an MR region > boundary. We could support up to 512 with a kernel backend but no one > seems to want that :) Then I see issues with indirect descriptors. We try to allow up 1024 chained descriptors implicitly since e0e9b406470b ("vhost: max s/g to match qemu"). If guest can submit crossing MR descs, I'm afraid we've already had this bug since this commit. And actually this seems conflict to what spec said in 2.4.5: """ The number of descriptors in the table is defined by the queue size for this virtqueue: this is the maximum possible descriptor chain length. """ Technically, we had the same issue for rx since we allow 1024 queue size now. So actually, allowing the size to 1024 does not introduce any new trouble? > > With vhost-user the backend might be able to handle that. So an > acceptable option would be to allow 1K with vhost-user backends > only, trim it back with other backends. > I believe the idea is to clarify the maximum chain size instead of having any assumption. Thanks
On 06/15/2017 12:16 PM, Jason Wang wrote: > > > On 2017年06月14日 23:22, Michael S. Tsirkin wrote: >> On Wed, Jun 14, 2017 at 07:26:54PM +0800, Jason Wang wrote: >>> >>> On 2017年06月13日 18:46, Jason Wang wrote: >>>> >>>> On 2017年06月13日 17:50, Wei Wang wrote: >>>>> On 06/13/2017 05:04 PM, Jason Wang wrote: >>>>>> >>>>>> On 2017年06月13日 15:17, Wei Wang wrote: >>>>>>> On 06/13/2017 02:29 PM, Jason Wang wrote: >>>>>>>> The issue is what if there's a mismatch of max #sgs between >>>>>>>> qemu and >>>>>>>>>>> When the vhost backend is used, QEMU is not >>>>>>>>>>> involved in the data path. >>>>>>>>>>> The vhost backend >>>>>>>>>>> directly gets what is offered by the guest from the vq. Why >>>>>>>>>>> would >>>>>>>>>>> there be a mismatch of >>>>>>>>>>> max #sgs between QEMU and vhost, and what is >>>>>>>>>>> the QEMU side max #sgs >>>>>>>>>>> used for? Thanks. >>>>>>>>>> You need query the backend max #sgs in this case >>>>>>>>>> at least. no? If not >>>>>>>>>> how do you know the value is supported by the backend? >>>>>>>>>> >>>>>>>>>> Thanks >>>>>>>>>> >>>>>>>>> Here is my thought: vhost backend has already been >>>>>>>>> supporting 1024 sgs, >>>>>>>>> so I think it might not be necessary to query the >>>>>>>>> max sgs that the vhost >>>>>>>>> backend supports. In the setup phase, when QEMU >>>>>>>>> detects the backend is >>>>>>>>> vhost, it assumes 1024 max sgs is supported, instead >>>>>>>>> of giving an extra >>>>>>>>> call to query. >>>>>>>> We can probably assume vhost kernel supports up to 1024 >>>>>>>> sgs. But how about for other vhost-user backends? >>>>>>>> >>>>>>> So far, I haven't seen any vhost backend implementation >>>>>>> supporting less than 1024 sgs. >>>>>> Since vhost-user is an open protocol we can not check each >>>>>> implementation (some may be even close sourced). For safety, we >>>>>> need an explicit clarification on this. >>>>>> >>>>>>> >>>>>>>> And what you said here makes me ask one of my questions in the >>>>>>>> past: >>>>>>>> >>>>>>>> Do we have plan to extend 1024 to a larger value or 1024 >>>>>>>> looks good for the future years? If we only care about >>>>>>>> 1024, there's even no need for a new config filed, a >>>>>>>> feature flag is more than enough. If we want to extend >>>>>>>> it to e.g 2048, we definitely need to query vhost >>>>>>>> backend's limit (even for vhost-kernel). >>>>>>>> >>>>>>> According to virtio spec (e.g. 2.4.4), unreasonably large >>>>>>> descriptors are >>>>>>> not encouraged to be used by the guest. If possible, I would >>>>>>> suggest to use >>>>>>> 1024 as the largest number of descriptors that the guest can >>>>>>> chain, even when >>>>>>> we have larger queue size in the future. That is, >>>>>>> if (backend == QEMU backend) >>>>>>> config.max_chain_size = 1023 (defined by the qemu >>>>>>> backend implementation); >>>>>>> else if (backend == vhost) >>>>>>> config.max_chain_size = 1024; >>>>>>> >>>>>>> It is transparent to the guest. From the guest's point of >>>>>>> view, all it knows is a value >>>>>>> given to him via reading config.max_chain_size. >>>>>> So not transparent actually, guest at least guest need to see >>>>>> and check for this. So the question still, since you only care >>>>>> about two cases in fact: >>>>>> >>>>>> - backend supports 1024 >>>>>> - backend supports <1024 (qemu or whatever other backends) >>>>>> >>>>>> So it looks like a new feature flag is more than enough. If >>>>>> device(backends) support this feature, it can make sure 1024 sgs >>>>>> is supported? >>>>>> >>>>> That wouldn't be enough. For example, QEMU3.0 backend supports >>>>> max_chain_size=1023, >>>>> while QEMU4.0 backend supports max_chain_size=1021. How would the >>>>> guest know >>>>> the max size with the same feature flag? Would it still chain 1023 >>>>> descriptors with QEMU4.0? >>>>> >>>>> Best, >>>>> Wei >>>> I believe we won't go back to less than 1024 in the future. It may be >>>> worth to add a unittest for this to catch regression early. >>>> >>>> Thanks >> I think I disagree with that. Smaller pipes a better (e.g. less cache >> pressure) and you only need huge pipes because host thread gets >> scheduled out for too long. With more CPUs there's less of a chance of >> an overcommit so we'll be able to get by with smaller pipes in the >> future. > > Agree, but we are talking about the upper limit. Even if 1024 is > supported, small number of #sgs is still encouraged. > >> >>> Consider the queue size is 256 now, I think maybe we can first make >>> tx queue >>> size configurable up to 1024, and then do the #sg stuffs on top. >>> >>> What's your opinion, Michael? >>> >>> Thanks >> With a kernel backend, 1024 is problematic since we are then unable >> to add any entries or handle cases where an entry crosses an MR region >> boundary. We could support up to 512 with a kernel backend but no one >> seems to want that :) > > Then I see issues with indirect descriptors. > > We try to allow up 1024 chained descriptors implicitly since > e0e9b406470b ("vhost: max s/g to match qemu"). If guest can submit > crossing MR descs, I'm afraid we've already had this bug since this > commit. And actually this seems conflict to what spec said in 2.4.5: > > """ > The number of descriptors in the table is defined by the queue size > for this virtqueue: this is the maximum possible descriptor chain length. > """ > > Technically, we had the same issue for rx since we allow 1024 queue > size now. > > So actually, allowing the size to 1024 does not introduce any new > trouble? >> >> With vhost-user the backend might be able to handle that. So an >> acceptable option would be to allow 1K with vhost-user backends >> only, trim it back with other backends. >> > > I believe the idea is to clarify the maximum chain size instead of > having any assumption. > > I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE. For now, how about splitting it into two series of patches: 1) enable 1024 tx queue size for vhost-user, to let the users of vhost-user to easily use 1024 queue size. 2) enable VIRTIO_F_MAX_CHAIN_SIZE, to enhance robustness. Best, Wei
On Thu, Jun 15, 2017 at 02:52:01PM +0800, Wei Wang wrote: > On 06/15/2017 12:16 PM, Jason Wang wrote: > > > > > > On 2017年06月14日 23:22, Michael S. Tsirkin wrote: > > > On Wed, Jun 14, 2017 at 07:26:54PM +0800, Jason Wang wrote: > > > > > > > > On 2017年06月13日 18:46, Jason Wang wrote: > > > > > > > > > > On 2017年06月13日 17:50, Wei Wang wrote: > > > > > > On 06/13/2017 05:04 PM, Jason Wang wrote: > > > > > > > > > > > > > > On 2017年06月13日 15:17, Wei Wang wrote: > > > > > > > > On 06/13/2017 02:29 PM, Jason Wang wrote: > > > > > > > > > The issue is what if there's a mismatch of > > > > > > > > > max #sgs between qemu and > > > > > > > > > > > > When the vhost backend is used, QEMU is not > > > > > > > > > > > > involved in the data path. > > > > > > > > > > > > The vhost backend > > > > > > > > > > > > directly gets what is offered by > > > > > > > > > > > > the guest from the vq. Why would > > > > > > > > > > > > there be a mismatch of > > > > > > > > > > > > max #sgs between QEMU and vhost, and what is > > > > > > > > > > > > the QEMU side max #sgs > > > > > > > > > > > > used for? Thanks. > > > > > > > > > > > You need query the backend max #sgs in this case > > > > > > > > > > > at least. no? If not > > > > > > > > > > > how do you know the value is supported by the backend? > > > > > > > > > > > > > > > > > > > > > > Thanks > > > > > > > > > > > > > > > > > > > > > Here is my thought: vhost backend has already been > > > > > > > > > > supporting 1024 sgs, > > > > > > > > > > so I think it might not be necessary to query the > > > > > > > > > > max sgs that the vhost > > > > > > > > > > backend supports. In the setup phase, when QEMU > > > > > > > > > > detects the backend is > > > > > > > > > > vhost, it assumes 1024 max sgs is supported, instead > > > > > > > > > > of giving an extra > > > > > > > > > > call to query. > > > > > > > > > We can probably assume vhost kernel supports up to 1024 > > > > > > > > > sgs. But how about for other vhost-user backends? > > > > > > > > > > > > > > > > > So far, I haven't seen any vhost backend implementation > > > > > > > > supporting less than 1024 sgs. > > > > > > > Since vhost-user is an open protocol we can not check each > > > > > > > implementation (some may be even close sourced). For safety, we > > > > > > > need an explicit clarification on this. > > > > > > > > > > > > > > > > > > > > > > > > And what you said here makes me ask one of > > > > > > > > > my questions in the past: > > > > > > > > > > > > > > > > > > Do we have plan to extend 1024 to a larger value or 1024 > > > > > > > > > looks good for the future years? If we only care about > > > > > > > > > 1024, there's even no need for a new config filed, a > > > > > > > > > feature flag is more than enough. If we want to extend > > > > > > > > > it to e.g 2048, we definitely need to query vhost > > > > > > > > > backend's limit (even for vhost-kernel). > > > > > > > > > > > > > > > > > According to virtio spec (e.g. 2.4.4), unreasonably large > > > > > > > > descriptors are > > > > > > > > not encouraged to be used by the guest. If possible, I would > > > > > > > > suggest to use > > > > > > > > 1024 as the largest number of descriptors that the guest can > > > > > > > > chain, even when > > > > > > > > we have larger queue size in the future. That is, > > > > > > > > if (backend == QEMU backend) > > > > > > > > config.max_chain_size = 1023 (defined by the qemu > > > > > > > > backend implementation); > > > > > > > > else if (backend == vhost) > > > > > > > > config.max_chain_size = 1024; > > > > > > > > > > > > > > > > It is transparent to the guest. From the guest's point of > > > > > > > > view, all it knows is a value > > > > > > > > given to him via reading config.max_chain_size. > > > > > > > So not transparent actually, guest at least guest need to see > > > > > > > and check for this. So the question still, since you only care > > > > > > > about two cases in fact: > > > > > > > > > > > > > > - backend supports 1024 > > > > > > > - backend supports <1024 (qemu or whatever other backends) > > > > > > > > > > > > > > So it looks like a new feature flag is more than enough. If > > > > > > > device(backends) support this feature, it can make sure 1024 sgs > > > > > > > is supported? > > > > > > > > > > > > > That wouldn't be enough. For example, QEMU3.0 backend supports > > > > > > max_chain_size=1023, > > > > > > while QEMU4.0 backend supports max_chain_size=1021. How would the > > > > > > guest know > > > > > > the max size with the same feature flag? Would it still chain 1023 > > > > > > descriptors with QEMU4.0? > > > > > > > > > > > > Best, > > > > > > Wei > > > > > I believe we won't go back to less than 1024 in the future. It may be > > > > > worth to add a unittest for this to catch regression early. > > > > > > > > > > Thanks > > > I think I disagree with that. Smaller pipes a better (e.g. less cache > > > pressure) and you only need huge pipes because host thread gets > > > scheduled out for too long. With more CPUs there's less of a chance of > > > an overcommit so we'll be able to get by with smaller pipes in the > > > future. > > > > Agree, but we are talking about the upper limit. Even if 1024 is > > supported, small number of #sgs is still encouraged. > > > > > > > > > Consider the queue size is 256 now, I think maybe we can first > > > > make tx queue > > > > size configurable up to 1024, and then do the #sg stuffs on top. > > > > > > > > What's your opinion, Michael? > > > > > > > > Thanks > > > With a kernel backend, 1024 is problematic since we are then unable > > > to add any entries or handle cases where an entry crosses an MR region > > > boundary. We could support up to 512 with a kernel backend but no one > > > seems to want that :) > > > > Then I see issues with indirect descriptors. > > > > We try to allow up 1024 chained descriptors implicitly since > > e0e9b406470b ("vhost: max s/g to match qemu"). If guest can submit > > crossing MR descs, I'm afraid we've already had this bug since this > > commit. And actually this seems conflict to what spec said in 2.4.5: > > > > """ > > The number of descriptors in the table is defined by the queue size for > > this virtqueue: this is the maximum possible descriptor chain length. > > """ > > > > Technically, we had the same issue for rx since we allow 1024 queue size > > now. > > > > So actually, allowing the size to 1024 does not introduce any new > > trouble? > > > > > > With vhost-user the backend might be able to handle that. So an > > > acceptable option would be to allow 1K with vhost-user backends > > > only, trim it back with other backends. > > > > > > > I believe the idea is to clarify the maximum chain size instead of > > having any assumption. > > > > > > I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE. > > For now, how about splitting it into two series of patches: > 1) enable 1024 tx queue size for vhost-user, to let the users of vhost-user > to easily use 1024 queue size. Fine with me. 1) will get property from user but override it on !vhost-user. Do we need a protocol flag? It seems prudent but we get back to cross-version migration issues that are still pending solution. Marc Andre, what's the status of that work? > 2) enable VIRTIO_F_MAX_CHAIN_SIZE, to enhance robustness. Rather, to support it for more backends. > Best, > Wei
On 2017年06月16日 11:22, Michael S. Tsirkin wrote: >> I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE. >> >> For now, how about splitting it into two series of patches: >> 1) enable 1024 tx queue size for vhost-user, to let the users of vhost-user >> to easily use 1024 queue size. > Fine with me. 1) will get property from user but override it on > !vhost-user. Do we need a protocol flag? It seems prudent but we get > back to cross-version migration issues that a04re still pending solution. > Marc Andre, what's the status of that work? > >> 2) enable VIRTIO_F_MAX_CHAIN_SIZE, to enhance robustness. > Rather, to support it for more backends. Ok, if we want to support different values of max chain size in the future. It would be problematic for migration of cross backends, consider the case when migrating from 2048 (vhost-user) to 1024 (qemu/vhost-kernel). Thanks > >> Best, >> Wei > --
On 06/16/2017 04:57 PM, Jason Wang wrote: > > > On 2017年06月16日 11:22, Michael S. Tsirkin wrote: >>> I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE. >>> >>> For now, how about splitting it into two series of patches: >>> 1) enable 1024 tx queue size for vhost-user, to let the users of >>> vhost-user >>> to easily use 1024 queue size. >> Fine with me. 1) will get property from user but override it on >> !vhost-user. Do we need a protocol flag? It seems prudent but we get >> back to cross-version migration issues that are still pending solution. What do you have in mind about the protocol flag? Btw, I just tested the patch of 1), and it works fine with migration from the patched to non-patched version of QEMU. I'll send it out. Please have a check. >> Marc Andre, what's the status of that work? >> >>> 2) enable VIRTIO_F_MAX_CHAIN_SIZE, to enhance robustness. >> Rather, to support it for more backends. > > Ok, if we want to support different values of max chain size in the > future. It would be problematic for migration of cross backends, > consider the case when migrating from 2048 (vhost-user) to 1024 > (qemu/vhost-kernel). > I think that wouldn't be a problem. If there is a possibility to change the backend resulting in a change of config.max_change_size, a configuration change notification can be injected to the guest, then guest will read and get the new value. Best, Wei
On Fri, Jun 16, 2017 at 06:10:27PM +0800, Wei Wang wrote: > On 06/16/2017 04:57 PM, Jason Wang wrote: > > > > > > On 2017年06月16日 11:22, Michael S. Tsirkin wrote: > > > > I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE. > > > > > > > > For now, how about splitting it into two series of patches: > > > > 1) enable 1024 tx queue size for vhost-user, to let the users of > > > > vhost-user > > > > to easily use 1024 queue size. > > > Fine with me. 1) will get property from user but override it on > > > !vhost-user. Do we need a protocol flag? It seems prudent but we get > > > back to cross-version migration issues that are still pending solution. > What do you have in mind about the protocol flag? Merely this: older clients might be confused if they get a s/g with 1024 entries. > Btw, I just tested the patch of 1), and it works fine with migration from > the > patched to non-patched version of QEMU. I'll send it out. Please have a > check. > > > > > Marc Andre, what's the status of that work? > > > > > > > 2) enable VIRTIO_F_MAX_CHAIN_SIZE, to enhance robustness. > > > Rather, to support it for more backends. > > > > Ok, if we want to support different values of max chain size in the > > future. It would be problematic for migration of cross backends, > > consider the case when migrating from 2048 (vhost-user) to 1024 > > (qemu/vhost-kernel). > > > > I think that wouldn't be a problem. If there is a possibility to change the > backend resulting in a change of config.max_change_size, a configuration > change notification can be injected to the guest, then guest will read and > get the new value. > > Best, > Wei This might not be supportable by all guests. E.g. some requests might already be in the queue. I'm not against reconfiguring devices across migration but I think it's a big project. As a 1st step I would focus on keeping configuration consistent across migrations.
On Fri, Jun 16, 2017 at 04:57:01PM +0800, Jason Wang wrote: > > > On 2017年06月16日 11:22, Michael S. Tsirkin wrote: > > > I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE. > > > > > > For now, how about splitting it into two series of patches: > > > 1) enable 1024 tx queue size for vhost-user, to let the users of vhost-user > > > to easily use 1024 queue size. > > Fine with me. 1) will get property from user but override it on > > !vhost-user. Do we need a protocol flag? It seems prudent but we get > > back to cross-version migration issues that a04re still pending solution. > > Marc Andre, what's the status of that work? > > > > > 2) enable VIRTIO_F_MAX_CHAIN_SIZE, to enhance robustness. > > Rather, to support it for more backends. > > Ok, if we want to support different values of max chain size in the future. > It would be problematic for migration of cross backends, consider the case > when migrating from 2048 (vhost-user) to 1024 (qemu/vhost-kernel). > > Thanks That's already a problem, and it's growing with each new feature. Maxime looked at supporting vhost-user backends cross-version migration, I think we must merge some solution sooner rather than later, preferably by the next release. Maxime, any update here? Do we need a meeting to reach consensus? > > > > > Best, > > > Wei > > --
On 06/16/2017 05:19 PM, Michael S. Tsirkin wrote: > On Fri, Jun 16, 2017 at 04:57:01PM +0800, Jason Wang wrote: >> >> >> On 2017年06月16日 11:22, Michael S. Tsirkin wrote: >>>> I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE. >>>> >>>> For now, how about splitting it into two series of patches: >>>> 1) enable 1024 tx queue size for vhost-user, to let the users of vhost-user >>>> to easily use 1024 queue size. >>> Fine with me. 1) will get property from user but override it on >>> !vhost-user. Do we need a protocol flag? It seems prudent but we get >>> back to cross-version migration issues that a04re still pending solution. >>> Marc Andre, what's the status of that work? >>> >>>> 2) enable VIRTIO_F_MAX_CHAIN_SIZE, to enhance robustness. >>> Rather, to support it for more backends. >> >> Ok, if we want to support different values of max chain size in the future. >> It would be problematic for migration of cross backends, consider the case >> when migrating from 2048 (vhost-user) to 1024 (qemu/vhost-kernel). >> >> Thanks > > That's already a problem, and it's growing with each new feature. > Maxime looked at supporting vhost-user backends cross-version migration, > I think we must merge some solution sooner rather than later, preferably > by the next release. > > Maxime, any update here? Do we need a meeting to reach consensus? No update, I haven't found time to progress on the topic yet. For those who aren't aware of my initial proposal, you may find it here: https://www.spinics.net/linux/fedora/libvir/msg142668.html If my understanding is correct, you were concerned about the complexity of my proposal which involved too many layers. Your suggestion was to have a tool provided with qemu that would connect to vhost-user socket and query the backend capabilities. I'm not 100% clear how it would work, as the trend is to start the backend in client mode, meaning QEMU creates the socket. In this case, should the tool create the socket and management tool request the backend to connect to it? I think it could make sense to have a meeting, but maybe we should first discuss the solutions on the list for efficiency. For the delivery, what is QEMU v2.10 planned release date? Note that my solution doesn't involve QEMU, so it would not be tight to QEMU release date. But, that doesn't mean it would be delivered sooner than your solution. Maxime
On Fri, Jun 16, 2017 at 07:04:27PM +0200, Maxime Coquelin wrote: > > > On 06/16/2017 05:19 PM, Michael S. Tsirkin wrote: > > On Fri, Jun 16, 2017 at 04:57:01PM +0800, Jason Wang wrote: > > > > > > > > > On 2017年06月16日 11:22, Michael S. Tsirkin wrote: > > > > > I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE. > > > > > > > > > > For now, how about splitting it into two series of patches: > > > > > 1) enable 1024 tx queue size for vhost-user, to let the users of vhost-user > > > > > to easily use 1024 queue size. > > > > Fine with me. 1) will get property from user but override it on > > > > !vhost-user. Do we need a protocol flag? It seems prudent but we get > > > > back to cross-version migration issues that a04re still pending solution. > > > > Marc Andre, what's the status of that work? > > > > > > > > > 2) enable VIRTIO_F_MAX_CHAIN_SIZE, to enhance robustness. > > > > Rather, to support it for more backends. > > > > > > Ok, if we want to support different values of max chain size in the future. > > > It would be problematic for migration of cross backends, consider the case > > > when migrating from 2048 (vhost-user) to 1024 (qemu/vhost-kernel). > > > > > > Thanks > > > > That's already a problem, and it's growing with each new feature. > > Maxime looked at supporting vhost-user backends cross-version migration, > > I think we must merge some solution sooner rather than later, preferably > > by the next release. > > > > Maxime, any update here? Do we need a meeting to reach consensus? > > No update, I haven't found time to progress on the topic yet. > > For those who aren't aware of my initial proposal, you may find it here: > https://www.spinics.net/linux/fedora/libvir/msg142668.html > > If my understanding is correct, you were concerned about the complexity of > my > proposal which involved too many layers. Your suggestion was to have a tool > provided with qemu that would connect to vhost-user socket and query the > backend capabilities. > I'm not 100% clear how it would work, as the trend is to start the backend > in > client mode, meaning QEMU creates the socket. In this case, should the tool > create the socket and management tool request the backend to connect to it? > > I think it could make sense to have a meeting, but maybe we should first > discuss the solutions on the list for efficiency. > > For the delivery, what is QEMU v2.10 planned release date? > > Note that my solution doesn't involve QEMU, so it would not be tight to QEMU > release date. But, that doesn't mean it would be delivered sooner than > your solution. > > Maxime I'd say let's go with your proposal (Solution 3 above).
On 06/16/2017 11:15 PM, Michael S. Tsirkin wrote: > On Fri, Jun 16, 2017 at 06:10:27PM +0800, Wei Wang wrote: >> On 06/16/2017 04:57 PM, Jason Wang wrote: >>> >>> On 2017年06月16日 11:22, Michael S. Tsirkin wrote: >>>>> I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE. >>>>> >>>>> For now, how about splitting it into two series of patches: >>>>> 1) enable 1024 tx queue size for vhost-user, to let the users of >>>>> vhost-user >>>>> to easily use 1024 queue size. >>>> Fine with me. 1) will get property from user but override it on >>>> !vhost-user. Do we need a protocol flag? It seems prudent but we get >>>> back to cross-version migration issues that are still pending solution. >> What do you have in mind about the protocol flag? > Merely this: older clients might be confused if they get > a s/g with 1024 entries. I don't disagree to add that. But the client (i.e. vhost-user slave) is a host userspace program, and it seems that users can easily patch their host side applications if there is any issue, maybe we also don't need to be too prudent about that, do we? Also, the usage of the protocol flag looks like a duplicate of what we plan to add in the next step - the virtio common feature flag, VIRTIO_F_MAX_CHAIN_SIZE, which is more general and can be used across different backends. >> Btw, I just tested the patch of 1), and it works fine with migration from >> the >> patched to non-patched version of QEMU. I'll send it out. Please have a >> check. >> >> >>>> Marc Andre, what's the status of that work? >>>> >>>>> 2) enable VIRTIO_F_MAX_CHAIN_SIZE, to enhance robustness. >>>> Rather, to support it for more backends. >>> Ok, if we want to support different values of max chain size in the >>> future. It would be problematic for migration of cross backends, >>> consider the case when migrating from 2048 (vhost-user) to 1024 >>> (qemu/vhost-kernel). >>> >> I think that wouldn't be a problem. If there is a possibility to change the >> backend resulting in a change of config.max_change_size, a configuration >> change notification can be injected to the guest, then guest will read and >> get the new value. >> >> Best, >> Wei > This might not be supportable by all guests. E.g. some requests might > already be in the queue. I'm not against reconfiguring devices across > migration but I think it's a big project. As a 1st step I would focus on > keeping configuration consistent across migrations. > Would it be common and fair for vendors to migrate from a new QEMU to an old QEMU, which would downgrade the services that they provide to their users? Even for any reason that downgrade happens, I think it is sensible to sacrifice something (e.g. drop the unsupported requests from the queue) for the transition, right? On the other side, packet drop is normally handled at the packet protocol layer, e.g. TCP. Also, usually some amount of packet drop is acceptable during live migration. Best, Wei
On Sat, Jun 17, 2017 at 04:37:02PM +0800, Wei Wang wrote: > On 06/16/2017 11:15 PM, Michael S. Tsirkin wrote: > > On Fri, Jun 16, 2017 at 06:10:27PM +0800, Wei Wang wrote: > > > On 06/16/2017 04:57 PM, Jason Wang wrote: > > > > > > > > On 2017年06月16日 11:22, Michael S. Tsirkin wrote: > > > > > > I think the issues can be solved by VIRTIO_F_MAX_CHAIN_SIZE. > > > > > > > > > > > > For now, how about splitting it into two series of patches: > > > > > > 1) enable 1024 tx queue size for vhost-user, to let the users of > > > > > > vhost-user > > > > > > to easily use 1024 queue size. > > > > > Fine with me. 1) will get property from user but override it on > > > > > !vhost-user. Do we need a protocol flag? It seems prudent but we get > > > > > back to cross-version migration issues that are still pending solution. > > > What do you have in mind about the protocol flag? > > Merely this: older clients might be confused if they get > > a s/g with 1024 entries. > > I don't disagree to add that. But the client (i.e. vhost-user > slave) is a host userspace program, and it seems that users can > easily patch their host side applications if there is any issue, > maybe we also don't need to be too prudent about that, do we? I won't insist on this but it might not be easy. For example, are there clients that want to forward the packet to host kernel as a s/g? > > Also, the usage of the protocol flag looks like a duplicate of what > we plan to add in the next step - the virtio common feature flag, > VIRTIO_F_MAX_CHAIN_SIZE, which is more general and can be used > across different backends. > > > > Btw, I just tested the patch of 1), and it works fine with migration from > > > the > > > patched to non-patched version of QEMU. I'll send it out. Please have a > > > check. > > > > > > > > > > > Marc Andre, what's the status of that work? > > > > > > > > > > > 2) enable VIRTIO_F_MAX_CHAIN_SIZE, to enhance robustness. > > > > > Rather, to support it for more backends. > > > > Ok, if we want to support different values of max chain size in the > > > > future. It would be problematic for migration of cross backends, > > > > consider the case when migrating from 2048 (vhost-user) to 1024 > > > > (qemu/vhost-kernel). > > > > > > > I think that wouldn't be a problem. If there is a possibility to change the > > > backend resulting in a change of config.max_change_size, a configuration > > > change notification can be injected to the guest, then guest will read and > > > get the new value. > > > > > > Best, > > > Wei > > This might not be supportable by all guests. E.g. some requests might > > already be in the queue. I'm not against reconfiguring devices across > > migration but I think it's a big project. As a 1st step I would focus on > > keeping configuration consistent across migrations. > > > > Would it be common and fair for vendors to migrate from a new QEMU > to an old QEMU, which would downgrade the services that they provide > to their users? > > Even for any reason that downgrade happens, I think it is > sensible to sacrifice something (e.g. drop the unsupported > requests from the queue) for the transition, right? > > On the other side, packet drop is normally handled at the packet > protocol layer, e.g. TCP. Also, usually some amount of packet drop > is acceptable during live migration. > > Best, > Wei This isn't how people expect migration to be handled ATM. For now, I suggest we assume both sides need to be consistent.
On 06/19/2017 03:46 AM, Michael S. Tsirkin wrote: >>>> >>>> What do you have in mind about the protocol flag? >>> Merely this: older clients might be confused if they get >>> a s/g with 1024 entries. >> I don't disagree to add that. But the client (i.e. vhost-user >> slave) is a host userspace program, and it seems that users can >> easily patch their host side applications if there is any issue, >> maybe we also don't need to be too prudent about that, do we? > I won't insist on this but it might not be easy. For example, are there > clients that want to forward the packet to host kernel as a s/g? Not sure about all the client implementation, but it sounds like a strange and inefficient usage of vhost-user to pass packets to the host kernel, given vhost kernel backend is already there for the usage. Best, Wei
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 7d091c9..5c82f54 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -33,8 +33,13 @@ /* previously fixed value */ #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 +#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 1024 + /* for now, only allow larger queues; with virtio-1, guest can downsize */ #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE +#define VIRTIO_NET_TX_QUEUE_MIN_SIZE 256 + +#define VIRTIO_NET_MAX_CHAIN_SIZE 1023 /* * Calculate the number of bytes up to and including the given 'field' of @@ -57,6 +62,8 @@ static VirtIOFeature feature_sizes[] = { .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, {.flags = 1 << VIRTIO_NET_F_MTU, .end = endof(struct virtio_net_config, mtu)}, + {.flags = 1 << VIRTIO_F_MAX_CHAIN_SIZE, + .end = endof(struct virtio_net_config, max_chain_size)}, {} }; @@ -84,6 +91,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) virtio_stw_p(vdev, &netcfg.status, n->status); virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues); virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); + virtio_stw_p(vdev, &netcfg.max_chain_size, VIRTIO_NET_MAX_CHAIN_SIZE); memcpy(netcfg.mac, n->mac, ETH_ALEN); memcpy(config, &netcfg, n->config_size); } @@ -635,9 +643,33 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) return virtio_net_guest_offloads_by_features(vdev->guest_features); } +static bool is_tx(int queue_index) +{ + return queue_index % 2 == 1; +} + +static void virtio_net_reconfig_tx_queue_size(VirtIONet *n) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(n); + int i, num_queues = virtio_get_num_queues(vdev); + + /* Return when the queue size is already less than the 1024 */ + if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE) { + return; + } + + for (i = 0; i < num_queues; i++) { + if (is_tx(i)) { + n->net_conf.tx_queue_size = VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE / 2; + virtio_queue_set_num(vdev, i, n->net_conf.tx_queue_size); + } + } +} + static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) { VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc = qemu_get_queue(n->nic); int i; virtio_net_set_multiqueue(n, @@ -649,6 +681,16 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) virtio_has_feature(features, VIRTIO_F_VERSION_1)); + /* + * When the traditional QEMU backend is used, using 1024 tx queue size + * requires the driver to support the VIRTIO_F_MAX_CHAIN_SIZE feature. If + * the feature is not supported, reconfigure the tx queue size to 512. + */ + if (!get_vhost_net(nc->peer) && + !virtio_has_feature(features, VIRTIO_F_MAX_CHAIN_SIZE)) { + virtio_net_reconfig_tx_queue_size(n); + } + if (n->has_vnet_hdr) { n->curr_guest_offloads = virtio_net_guest_offloads_by_features(features); @@ -1297,8 +1339,8 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) out_num = elem->out_num; out_sg = elem->out_sg; - if (out_num < 1) { - virtio_error(vdev, "virtio-net header not in first element"); + if (out_num < 1 || out_num > VIRTIO_NET_MAX_CHAIN_SIZE) { + virtio_error(vdev, "no packet or too large vring desc chain"); virtqueue_detach_element(q->tx_vq, elem, 0); g_free(elem); return -EINVAL; @@ -1496,13 +1538,15 @@ static void virtio_net_add_queue(VirtIONet *n, int index) virtio_net_handle_rx); if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { n->vqs[index].tx_vq = - virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer); + virtio_add_queue(vdev, n->net_conf.tx_queue_size, + virtio_net_handle_tx_timer); n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, virtio_net_tx_timer, &n->vqs[index]); } else { n->vqs[index].tx_vq = - virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh); + virtio_add_queue(vdev, n->net_conf.tx_queue_size, + virtio_net_handle_tx_bh); n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); } @@ -1891,6 +1935,10 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) n->host_features |= (0x1 << VIRTIO_NET_F_MTU); } + if (virtio_host_has_feature(vdev, VIRTIO_F_MAX_CHAIN_SIZE)) { + n->host_features |= (0x1 << VIRTIO_F_MAX_CHAIN_SIZE); + } + virtio_net_set_config_size(n, n->host_features); virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); @@ -1910,6 +1958,17 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) return; } + if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || + n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || + (n->net_conf.tx_queue_size & (n->net_conf.tx_queue_size - 1))) { + error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " + "must be a power of 2 between %d and %d.", + n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, + VIRTQUEUE_MAX_SIZE); + virtio_cleanup(vdev); + return; + } + n->max_queues = MAX(n->nic_conf.peers.queues, 1); if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) { error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " @@ -2089,6 +2148,8 @@ static Property virtio_net_properties[] = { DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), + DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, + VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index 1eec9a2..fd944ba 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -36,6 +36,7 @@ typedef struct virtio_net_conf int32_t txburst; char *tx; uint16_t rx_queue_size; + uint16_t tx_queue_size; uint16_t mtu; } virtio_net_conf; diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 7b6edba..8e85e41 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -260,6 +260,8 @@ typedef struct VirtIORNGConf VirtIORNGConf; VIRTIO_F_NOTIFY_ON_EMPTY, true), \ DEFINE_PROP_BIT64("any_layout", _state, _field, \ VIRTIO_F_ANY_LAYOUT, true), \ + DEFINE_PROP_BIT64("max_chain_size", _state, _field, \ + VIRTIO_F_MAX_CHAIN_SIZE, true), \ DEFINE_PROP_BIT64("iommu_platform", _state, _field, \ VIRTIO_F_IOMMU_PLATFORM, false) diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h index b777069..b70cbfe 100644 --- a/include/standard-headers/linux/virtio_config.h +++ b/include/standard-headers/linux/virtio_config.h @@ -60,6 +60,9 @@ #define VIRTIO_F_ANY_LAYOUT 27 #endif /* VIRTIO_CONFIG_NO_LEGACY */ +/* Guest chains vring descriptors within a limit */ +#define VIRTIO_F_MAX_CHAIN_SIZE 31 + /* v1.0 compliant. */ #define VIRTIO_F_VERSION_1 32 diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h index 30ff249..729aaa8 100644 --- a/include/standard-headers/linux/virtio_net.h +++ b/include/standard-headers/linux/virtio_net.h @@ -76,6 +76,8 @@ struct virtio_net_config { uint16_t max_virtqueue_pairs; /* Default maximum transmit unit advice */ uint16_t mtu; + /* Maximum number of vring descriptors that can be chained */ + uint16_t max_chain_size; } QEMU_PACKED; /*
This patch enables the virtio-net tx queue size to be configurable between 256 and 1024 by the user. The queue size specified by the user should be power of 2. If "tx_queue_size" is not offered by the user, the default queue size, 1024, will be used. For the traditional QEMU backend, setting the tx queue size to be 1024 requires the guest virtio driver to support the VIRTIO_F_MAX_CHAIN_SIZE feature. This feature restricts the guest driver from chaining 1024 vring descriptors, which may cause the device side implementation to send more than 1024 iov to writev. VIRTIO_F_MAX_CHAIN_SIZE is a common transport feature added for all virtio devices. However, each device has the flexibility to set the max chain size to limit its driver to chain vring descriptors. Currently, the max chain size of the virtio-net device is set to 1023. In the case that the tx queue size is set to 1024 and the VIRTIO_F_MAX_CHAIN_SIZE feature is not supported by the guest driver, the tx queue size will be reconfigured to be 512. Signed-off-by: Wei Wang <wei.w.wang@intel.com> RFC to v1 changes: 1) change VIRTIO_F_MAX_CHAIN_SIZE to be a common virtio feature (was virtio-net specific); 2) change the default tx queue size to be 1024 (was 256); 3) For the vhost backend case, setting tx queue size to be 1024 dosen't require the VIRTIO_F_MAX_CHAIN_SIZE feature support. --- hw/net/virtio-net.c | 69 ++++++++++++++++++++++++-- include/hw/virtio/virtio-net.h | 1 + include/hw/virtio/virtio.h | 2 + include/standard-headers/linux/virtio_config.h | 3 ++ include/standard-headers/linux/virtio_net.h | 2 + 5 files changed, 73 insertions(+), 4 deletions(-)