Message ID | 3cb082f1c88f3f2ef1fc250dbc0745fb79c745c7.1660362668.git.bobby.eshleman@bytedance.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | virtio/vsock: introduce dgrams, sk_buff, and qdisc | expand |
Hi Bobby, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on mst-vhost/linux-next] [also build test WARNING on linus/master v6.0-rc1 next-20220815] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Bobby-Eshleman/virtio-vsock-introduce-dgrams-sk_buff-and-qdisc/20220816-015812 base: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20220816/202208160405.cG02E3MZ-lkp@intel.com/config) compiler: m68k-linux-gcc (GCC) 12.1.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/cbb332da78c86ac574688831ed6f404d04d506db git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Bobby-Eshleman/virtio-vsock-introduce-dgrams-sk_buff-and-qdisc/20220816-015812 git checkout cbb332da78c86ac574688831ed6f404d04d506db # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k SHELL=/bin/bash net/vmw_vsock/ If you fix the issue, kindly add following tag where applicable Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): net/vmw_vsock/virtio_transport_common.c: In function 'virtio_transport_dgram_do_dequeue': >> net/vmw_vsock/virtio_transport_common.c:605:13: warning: variable 'free_space' set but not used [-Wunused-but-set-variable] 605 | u32 free_space; | ^~~~~~~~~~ vim +/free_space +605 net/vmw_vsock/virtio_transport_common.c 597 598 static ssize_t 599 virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk, 600 struct msghdr *msg, size_t len) 601 { 602 struct virtio_vsock_sock *vvs = vsk->trans; 603 struct sk_buff *skb; 604 size_t total = 0; > 605 u32 free_space; 606 int err = -EFAULT; 607 608 spin_lock_bh(&vvs->rx_lock); 609 if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) { 610 skb = __skb_dequeue(&vvs->rx_queue); 611 612 total = len; 613 if (total > skb->len - vsock_metadata(skb)->off) 614 total = skb->len - vsock_metadata(skb)->off; 615 else if (total < skb->len - vsock_metadata(skb)->off) 616 msg->msg_flags |= MSG_TRUNC; 617 618 /* sk_lock is held by caller so no one else can dequeue. 619 * Unlock rx_lock since memcpy_to_msg() may sleep. 620 */ 621 spin_unlock_bh(&vvs->rx_lock); 622 623 err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total); 624 if (err) 625 return err; 626 627 spin_lock_bh(&vvs->rx_lock); 628 629 virtio_transport_dec_rx_pkt(vvs, skb); 630 consume_skb(skb); 631 } 632 633 free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); 634 635 spin_unlock_bh(&vvs->rx_lock); 636 637 if (total > 0 && msg->msg_name) { 638 /* Provide the address of the sender. */ 639 DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); 640 641 vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid), 642 le32_to_cpu(vsock_hdr(skb)->src_port)); 643 msg->msg_namelen = sizeof(*vm_addr); 644 } 645 return total; 646 } 647
CC'ing virtio-dev@lists.oasis-open.org On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote: > This patch supports dgram in virtio and on the vhost side. > > Signed-off-by: Jiang Wang <jiang.wang@bytedance.com> > Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com> > --- > drivers/vhost/vsock.c | 2 +- > include/net/af_vsock.h | 2 + > include/uapi/linux/virtio_vsock.h | 1 + > net/vmw_vsock/af_vsock.c | 26 +++- > net/vmw_vsock/virtio_transport.c | 2 +- > net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++-- > 6 files changed, 186 insertions(+), 20 deletions(-) > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > index a5d1bdb786fe..3dc72a5647ca 100644 > --- a/drivers/vhost/vsock.c > +++ b/drivers/vhost/vsock.c > @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void) > int ret; > > ret = vsock_core_register(&vhost_transport.transport, > - VSOCK_TRANSPORT_F_H2G); > + VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM); > if (ret < 0) > return ret; > > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h > index 1c53c4c4d88f..37e55c81e4df 100644 > --- a/include/net/af_vsock.h > +++ b/include/net/af_vsock.h > @@ -78,6 +78,8 @@ struct vsock_sock { > s64 vsock_stream_has_data(struct vsock_sock *vsk); > s64 vsock_stream_has_space(struct vsock_sock *vsk); > struct sock *vsock_create_connected(struct sock *parent); > +int vsock_bind_stream(struct vsock_sock *vsk, > + struct sockaddr_vm *addr); > > /**** TRANSPORT ****/ > > diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h > index 857df3a3a70d..0975b9c88292 100644 > --- a/include/uapi/linux/virtio_vsock.h > +++ b/include/uapi/linux/virtio_vsock.h > @@ -70,6 +70,7 @@ struct virtio_vsock_hdr { > enum virtio_vsock_type { > VIRTIO_VSOCK_TYPE_STREAM = 1, > VIRTIO_VSOCK_TYPE_SEQPACKET = 2, > + VIRTIO_VSOCK_TYPE_DGRAM = 3, > }; > > enum virtio_vsock_op { > diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c > index 1893f8aafa48..87e4ae1866d3 100644 > --- a/net/vmw_vsock/af_vsock.c > +++ b/net/vmw_vsock/af_vsock.c > @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, > return 0; > } > > +int vsock_bind_stream(struct vsock_sock *vsk, > + struct sockaddr_vm *addr) > +{ > + int retval; > + > + spin_lock_bh(&vsock_table_lock); > + retval = __vsock_bind_connectible(vsk, addr); > + spin_unlock_bh(&vsock_table_lock); > + > + return retval; > +} > +EXPORT_SYMBOL(vsock_bind_stream); > + > static int __vsock_bind_dgram(struct vsock_sock *vsk, > struct sockaddr_vm *addr) > { > @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features) > } > > if (features & VSOCK_TRANSPORT_F_DGRAM) { > - if (t_dgram) { > - err = -EBUSY; > - goto err_busy; > + /* TODO: always chose the G2H variant over others, support nesting later */ > + if (features & VSOCK_TRANSPORT_F_G2H) { > + if (t_dgram) > + pr_warn("virtio_vsock: t_dgram already set\n"); > + t_dgram = t; > + } > + > + if (!t_dgram) { > + t_dgram = t; > } > - t_dgram = t; > } > > if (features & VSOCK_TRANSPORT_F_LOCAL) { > diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c > index 073314312683..d4526ca462d2 100644 > --- a/net/vmw_vsock/virtio_transport.c > +++ b/net/vmw_vsock/virtio_transport.c > @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void) > return -ENOMEM; > > ret = vsock_core_register(&virtio_transport.transport, > - VSOCK_TRANSPORT_F_G2H); > + VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM); > if (ret) > goto out_wq; > > diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c > index bdf16fff054f..aedb48728677 100644 > --- a/net/vmw_vsock/virtio_transport_common.c > +++ b/net/vmw_vsock/virtio_transport_common.c > @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); > > static u16 virtio_transport_get_type(struct sock *sk) > { > - if (sk->sk_type == SOCK_STREAM) > + if (sk->sk_type == SOCK_DGRAM) > + return VIRTIO_VSOCK_TYPE_DGRAM; > + else if (sk->sk_type == SOCK_STREAM) > return VIRTIO_VSOCK_TYPE_STREAM; > else > return VIRTIO_VSOCK_TYPE_SEQPACKET; > @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, > vvs = vsk->trans; > > /* we can send less than pkt_len bytes */ > - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) > - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > + else > + return 0; > + } > > - /* virtio_transport_get_credit might return less than pkt_len credit */ > - pkt_len = virtio_transport_get_credit(vvs, pkt_len); > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) { > + /* virtio_transport_get_credit might return less than pkt_len credit */ > + pkt_len = virtio_transport_get_credit(vvs, pkt_len); > > - /* Do not send zero length OP_RW pkt */ > - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) > - return pkt_len; > + /* Do not send zero length OP_RW pkt */ > + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) > + return pkt_len; > + } > > skb = virtio_transport_alloc_skb(info, pkt_len, > src_cid, src_port, > dst_cid, dst_port, > &err); > if (!skb) { > - virtio_transport_put_credit(vvs, pkt_len); > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > + virtio_transport_put_credit(vvs, pkt_len); > return err; > } > > @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, > } > EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); > > +static ssize_t > +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk, > + struct msghdr *msg, size_t len) > +{ > + struct virtio_vsock_sock *vvs = vsk->trans; > + struct sk_buff *skb; > + size_t total = 0; > + u32 free_space; > + int err = -EFAULT; > + > + spin_lock_bh(&vvs->rx_lock); > + if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) { > + skb = __skb_dequeue(&vvs->rx_queue); > + > + total = len; > + if (total > skb->len - vsock_metadata(skb)->off) > + total = skb->len - vsock_metadata(skb)->off; > + else if (total < skb->len - vsock_metadata(skb)->off) > + msg->msg_flags |= MSG_TRUNC; > + > + /* sk_lock is held by caller so no one else can dequeue. > + * Unlock rx_lock since memcpy_to_msg() may sleep. > + */ > + spin_unlock_bh(&vvs->rx_lock); > + > + err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total); > + if (err) > + return err; > + > + spin_lock_bh(&vvs->rx_lock); > + > + virtio_transport_dec_rx_pkt(vvs, skb); > + consume_skb(skb); > + } > + > + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); > + > + spin_unlock_bh(&vvs->rx_lock); > + > + if (total > 0 && msg->msg_name) { > + /* Provide the address of the sender. */ > + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); > + > + vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid), > + le32_to_cpu(vsock_hdr(skb)->src_port)); > + msg->msg_namelen = sizeof(*vm_addr); > + } > + return total; > +} > + > +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk) > +{ > + return virtio_transport_stream_has_data(vsk); > +} > + > int > virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, > struct msghdr *msg, > @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk, > struct msghdr *msg, > size_t len, int flags) > { > - return -EOPNOTSUPP; > + struct sock *sk; > + size_t err = 0; > + long timeout; > + > + DEFINE_WAIT(wait); > + > + sk = &vsk->sk; > + err = 0; > + > + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK) > + return -EOPNOTSUPP; > + > + lock_sock(sk); > + > + if (!len) > + goto out; > + > + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); > + > + while (1) { > + s64 ready; > + > + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); > + ready = virtio_transport_dgram_has_data(vsk); > + > + if (ready == 0) { > + if (timeout == 0) { > + err = -EAGAIN; > + finish_wait(sk_sleep(sk), &wait); > + break; > + } > + > + release_sock(sk); > + timeout = schedule_timeout(timeout); > + lock_sock(sk); > + > + if (signal_pending(current)) { > + err = sock_intr_errno(timeout); > + finish_wait(sk_sleep(sk), &wait); > + break; > + } else if (timeout == 0) { > + err = -EAGAIN; > + finish_wait(sk_sleep(sk), &wait); > + break; > + } > + } else { > + finish_wait(sk_sleep(sk), &wait); > + > + if (ready < 0) { > + err = -ENOMEM; > + goto out; > + } > + > + err = virtio_transport_dgram_do_dequeue(vsk, msg, len); > + break; > + } > + } > +out: > + release_sock(sk); > + return err; > } > EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); > > @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); > int virtio_transport_dgram_bind(struct vsock_sock *vsk, > struct sockaddr_vm *addr) > { > - return -EOPNOTSUPP; > + return vsock_bind_stream(vsk, addr); > } > EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); > > bool virtio_transport_dgram_allow(u32 cid, u32 port) > { > - return false; > + return true; > } > EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); > > @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk, > struct msghdr *msg, > size_t dgram_len) > { > - return -EOPNOTSUPP; > + struct virtio_vsock_pkt_info info = { > + .op = VIRTIO_VSOCK_OP_RW, > + .msg = msg, > + .pkt_len = dgram_len, > + .vsk = vsk, > + .remote_cid = remote_addr->svm_cid, > + .remote_port = remote_addr->svm_port, > + }; > + > + return virtio_transport_send_pkt_info(vsk, &info); > } > EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); > > @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk, > struct virtio_vsock_hdr *hdr = vsock_hdr(skb); > int err = 0; > > + if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) { > + virtio_transport_recv_enqueue(vsk, skb); > + sk->sk_data_ready(sk); > + return err; > + } > + > switch (le16_to_cpu(hdr->op)) { > case VIRTIO_VSOCK_OP_RW: > virtio_transport_recv_enqueue(vsk, skb); > @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, > static bool virtio_transport_valid_type(u16 type) > { > return (type == VIRTIO_VSOCK_TYPE_STREAM) || > - (type == VIRTIO_VSOCK_TYPE_SEQPACKET); > + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || > + (type == VIRTIO_VSOCK_TYPE_DGRAM); > } > > /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex > @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, > goto free_pkt; > } > > + if (sk->sk_type == SOCK_DGRAM) { > + virtio_transport_recv_connected(sk, skb); > + goto out; > + } > + > space_available = virtio_transport_space_update(sk, skb); > > /* Update CID in case it has changed after a transport reset event */ > @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, > break; > } > > +out: > release_sock(sk); > > /* Release refcnt obtained when we fetched this socket out of the > -- > 2.35.1 >
On Wed, Aug 17, 2022 at 05:01:00AM +0000, Arseniy Krasnov wrote: > On 16.08.2022 05:32, Bobby Eshleman wrote: > > CC'ing virtio-dev@lists.oasis-open.org > > > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote: > >> This patch supports dgram in virtio and on the vhost side. > Hello, > > sorry, i don't understand, how this maintains message boundaries? Or it > is unnecessary for SOCK_DGRAM? > > Thanks If I understand your question, the length is included in the header, so receivers always know that header start + header length + payload length marks the message boundary. > >> > >> Signed-off-by: Jiang Wang <jiang.wang@bytedance.com> > >> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com> > >> --- > >> drivers/vhost/vsock.c | 2 +- > >> include/net/af_vsock.h | 2 + > >> include/uapi/linux/virtio_vsock.h | 1 + > >> net/vmw_vsock/af_vsock.c | 26 +++- > >> net/vmw_vsock/virtio_transport.c | 2 +- > >> net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++-- > >> 6 files changed, 186 insertions(+), 20 deletions(-) > >> > >> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > >> index a5d1bdb786fe..3dc72a5647ca 100644 > >> --- a/drivers/vhost/vsock.c > >> +++ b/drivers/vhost/vsock.c > >> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void) > >> int ret; > >> > >> ret = vsock_core_register(&vhost_transport.transport, > >> - VSOCK_TRANSPORT_F_H2G); > >> + VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM); > >> if (ret < 0) > >> return ret; > >> > >> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h > >> index 1c53c4c4d88f..37e55c81e4df 100644 > >> --- a/include/net/af_vsock.h > >> +++ b/include/net/af_vsock.h > >> @@ -78,6 +78,8 @@ struct vsock_sock { > >> s64 vsock_stream_has_data(struct vsock_sock *vsk); > >> s64 vsock_stream_has_space(struct vsock_sock *vsk); > >> struct sock *vsock_create_connected(struct sock *parent); > >> +int vsock_bind_stream(struct vsock_sock *vsk, > >> + struct sockaddr_vm *addr); > >> > >> /**** TRANSPORT ****/ > >> > >> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h > >> index 857df3a3a70d..0975b9c88292 100644 > >> --- a/include/uapi/linux/virtio_vsock.h > >> +++ b/include/uapi/linux/virtio_vsock.h > >> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr { > >> enum virtio_vsock_type { > >> VIRTIO_VSOCK_TYPE_STREAM = 1, > >> VIRTIO_VSOCK_TYPE_SEQPACKET = 2, > >> + VIRTIO_VSOCK_TYPE_DGRAM = 3, > >> }; > >> > >> enum virtio_vsock_op { > >> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c > >> index 1893f8aafa48..87e4ae1866d3 100644 > >> --- a/net/vmw_vsock/af_vsock.c > >> +++ b/net/vmw_vsock/af_vsock.c > >> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, > >> return 0; > >> } > >> > >> +int vsock_bind_stream(struct vsock_sock *vsk, > >> + struct sockaddr_vm *addr) > >> +{ > >> + int retval; > >> + > >> + spin_lock_bh(&vsock_table_lock); > >> + retval = __vsock_bind_connectible(vsk, addr); > >> + spin_unlock_bh(&vsock_table_lock); > >> + > >> + return retval; > >> +} > >> +EXPORT_SYMBOL(vsock_bind_stream); > >> + > >> static int __vsock_bind_dgram(struct vsock_sock *vsk, > >> struct sockaddr_vm *addr) > >> { > >> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features) > >> } > >> > >> if (features & VSOCK_TRANSPORT_F_DGRAM) { > >> - if (t_dgram) { > >> - err = -EBUSY; > >> - goto err_busy; > >> + /* TODO: always chose the G2H variant over others, support nesting later */ > >> + if (features & VSOCK_TRANSPORT_F_G2H) { > >> + if (t_dgram) > >> + pr_warn("virtio_vsock: t_dgram already set\n"); > >> + t_dgram = t; > >> + } > >> + > >> + if (!t_dgram) { > >> + t_dgram = t; > >> } > >> - t_dgram = t; > >> } > >> > >> if (features & VSOCK_TRANSPORT_F_LOCAL) { > >> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c > >> index 073314312683..d4526ca462d2 100644 > >> --- a/net/vmw_vsock/virtio_transport.c > >> +++ b/net/vmw_vsock/virtio_transport.c > >> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void) > >> return -ENOMEM; > >> > >> ret = vsock_core_register(&virtio_transport.transport, > >> - VSOCK_TRANSPORT_F_G2H); > >> + VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM); > >> if (ret) > >> goto out_wq; > >> > >> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c > >> index bdf16fff054f..aedb48728677 100644 > >> --- a/net/vmw_vsock/virtio_transport_common.c > >> +++ b/net/vmw_vsock/virtio_transport_common.c > >> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); > >> > >> static u16 virtio_transport_get_type(struct sock *sk) > >> { > >> - if (sk->sk_type == SOCK_STREAM) > >> + if (sk->sk_type == SOCK_DGRAM) > >> + return VIRTIO_VSOCK_TYPE_DGRAM; > >> + else if (sk->sk_type == SOCK_STREAM) > >> return VIRTIO_VSOCK_TYPE_STREAM; > >> else > >> return VIRTIO_VSOCK_TYPE_SEQPACKET; > >> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, > >> vvs = vsk->trans; > >> > >> /* we can send less than pkt_len bytes */ > >> - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) > >> - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > >> + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { > >> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > >> + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > >> + else > >> + return 0; > >> + } > >> > >> - /* virtio_transport_get_credit might return less than pkt_len credit */ > >> - pkt_len = virtio_transport_get_credit(vvs, pkt_len); > >> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) { > >> + /* virtio_transport_get_credit might return less than pkt_len credit */ > >> + pkt_len = virtio_transport_get_credit(vvs, pkt_len); > >> > >> - /* Do not send zero length OP_RW pkt */ > >> - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) > >> - return pkt_len; > >> + /* Do not send zero length OP_RW pkt */ > >> + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) > >> + return pkt_len; > >> + } > >> > >> skb = virtio_transport_alloc_skb(info, pkt_len, > >> src_cid, src_port, > >> dst_cid, dst_port, > >> &err); > >> if (!skb) { > >> - virtio_transport_put_credit(vvs, pkt_len); > >> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > >> + virtio_transport_put_credit(vvs, pkt_len); > >> return err; > >> } > >> > >> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, > >> } > >> EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); > >> > >> +static ssize_t > >> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk, > >> + struct msghdr *msg, size_t len) > >> +{ > >> + struct virtio_vsock_sock *vvs = vsk->trans; > >> + struct sk_buff *skb; > >> + size_t total = 0; > >> + u32 free_space; > >> + int err = -EFAULT; > >> + > >> + spin_lock_bh(&vvs->rx_lock); > >> + if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) { > >> + skb = __skb_dequeue(&vvs->rx_queue); > >> + > >> + total = len; > >> + if (total > skb->len - vsock_metadata(skb)->off) > >> + total = skb->len - vsock_metadata(skb)->off; > >> + else if (total < skb->len - vsock_metadata(skb)->off) > >> + msg->msg_flags |= MSG_TRUNC; > >> + > >> + /* sk_lock is held by caller so no one else can dequeue. > >> + * Unlock rx_lock since memcpy_to_msg() may sleep. > >> + */ > >> + spin_unlock_bh(&vvs->rx_lock); > >> + > >> + err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total); > >> + if (err) > >> + return err; > >> + > >> + spin_lock_bh(&vvs->rx_lock); > >> + > >> + virtio_transport_dec_rx_pkt(vvs, skb); > >> + consume_skb(skb); > >> + } > >> + > >> + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); > >> + > >> + spin_unlock_bh(&vvs->rx_lock); > >> + > >> + if (total > 0 && msg->msg_name) { > >> + /* Provide the address of the sender. */ > >> + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); > >> + > >> + vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid), > >> + le32_to_cpu(vsock_hdr(skb)->src_port)); > >> + msg->msg_namelen = sizeof(*vm_addr); > >> + } > >> + return total; > >> +} > >> + > >> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk) > >> +{ > >> + return virtio_transport_stream_has_data(vsk); > >> +} > >> + > >> int > >> virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, > >> struct msghdr *msg, > >> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk, > >> struct msghdr *msg, > >> size_t len, int flags) > >> { > >> - return -EOPNOTSUPP; > >> + struct sock *sk; > >> + size_t err = 0; > >> + long timeout; > >> + > >> + DEFINE_WAIT(wait); > >> + > >> + sk = &vsk->sk; > >> + err = 0; > >> + > >> + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK) > >> + return -EOPNOTSUPP; > >> + > >> + lock_sock(sk); > >> + > >> + if (!len) > >> + goto out; > >> + > >> + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); > >> + > >> + while (1) { > >> + s64 ready; > >> + > >> + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); > >> + ready = virtio_transport_dgram_has_data(vsk); > >> + > >> + if (ready == 0) { > >> + if (timeout == 0) { > >> + err = -EAGAIN; > >> + finish_wait(sk_sleep(sk), &wait); > >> + break; > >> + } > >> + > >> + release_sock(sk); > >> + timeout = schedule_timeout(timeout); > >> + lock_sock(sk); > >> + > >> + if (signal_pending(current)) { > >> + err = sock_intr_errno(timeout); > >> + finish_wait(sk_sleep(sk), &wait); > >> + break; > >> + } else if (timeout == 0) { > >> + err = -EAGAIN; > >> + finish_wait(sk_sleep(sk), &wait); > >> + break; > >> + } > >> + } else { > >> + finish_wait(sk_sleep(sk), &wait); > >> + > >> + if (ready < 0) { > >> + err = -ENOMEM; > >> + goto out; > >> + } > >> + > >> + err = virtio_transport_dgram_do_dequeue(vsk, msg, len); > >> + break; > >> + } > >> + } > >> +out: > >> + release_sock(sk); > >> + return err; > >> } > >> EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); > >> > >> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); > >> int virtio_transport_dgram_bind(struct vsock_sock *vsk, > >> struct sockaddr_vm *addr) > >> { > >> - return -EOPNOTSUPP; > >> + return vsock_bind_stream(vsk, addr); > >> } > >> EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); > >> > >> bool virtio_transport_dgram_allow(u32 cid, u32 port) > >> { > >> - return false; > >> + return true; > >> } > >> EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); > >> > >> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk, > >> struct msghdr *msg, > >> size_t dgram_len) > >> { > >> - return -EOPNOTSUPP; > >> + struct virtio_vsock_pkt_info info = { > >> + .op = VIRTIO_VSOCK_OP_RW, > >> + .msg = msg, > >> + .pkt_len = dgram_len, > >> + .vsk = vsk, > >> + .remote_cid = remote_addr->svm_cid, > >> + .remote_port = remote_addr->svm_port, > >> + }; > >> + > >> + return virtio_transport_send_pkt_info(vsk, &info); > >> } > >> EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); > >> > >> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk, > >> struct virtio_vsock_hdr *hdr = vsock_hdr(skb); > >> int err = 0; > >> > >> + if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) { > >> + virtio_transport_recv_enqueue(vsk, skb); > >> + sk->sk_data_ready(sk); > >> + return err; > >> + } > >> + > >> switch (le16_to_cpu(hdr->op)) { > >> case VIRTIO_VSOCK_OP_RW: > >> virtio_transport_recv_enqueue(vsk, skb); > >> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, > >> static bool virtio_transport_valid_type(u16 type) > >> { > >> return (type == VIRTIO_VSOCK_TYPE_STREAM) || > >> - (type == VIRTIO_VSOCK_TYPE_SEQPACKET); > >> + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || > >> + (type == VIRTIO_VSOCK_TYPE_DGRAM); > >> } > >> > >> /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex > >> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, > >> goto free_pkt; > >> } > >> > >> + if (sk->sk_type == SOCK_DGRAM) { > >> + virtio_transport_recv_connected(sk, skb); > >> + goto out; > >> + } > >> + > >> space_available = virtio_transport_space_update(sk, skb); > >> > >> /* Update CID in case it has changed after a transport reset event */ > >> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, > >> break; > >> } > >> > >> +out: > >> release_sock(sk); > >> > >> /* Release refcnt obtained when we fetched this socket out of the > >> -- > >> 2.35.1 > >> > > > > --------------------------------------------------------------------- > > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org > > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org > > >
On Wed, Aug 17, 2022 at 05:42:08AM +0000, Arseniy Krasnov wrote: > On 17.08.2022 08:01, Arseniy Krasnov wrote: > > On 16.08.2022 05:32, Bobby Eshleman wrote: > >> CC'ing virtio-dev@lists.oasis-open.org > >> > >> On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote: > >>> This patch supports dgram in virtio and on the vhost side. > > Hello, > > > > sorry, i don't understand, how this maintains message boundaries? Or it > > is unnecessary for SOCK_DGRAM? > > > > Thanks > >>> > >>> Signed-off-by: Jiang Wang <jiang.wang@bytedance.com> > >>> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com> > >>> --- > >>> drivers/vhost/vsock.c | 2 +- > >>> include/net/af_vsock.h | 2 + > >>> include/uapi/linux/virtio_vsock.h | 1 + > >>> net/vmw_vsock/af_vsock.c | 26 +++- > >>> net/vmw_vsock/virtio_transport.c | 2 +- > >>> net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++-- > >>> 6 files changed, 186 insertions(+), 20 deletions(-) > >>> > >>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > >>> index a5d1bdb786fe..3dc72a5647ca 100644 > >>> --- a/drivers/vhost/vsock.c > >>> +++ b/drivers/vhost/vsock.c > >>> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void) > >>> int ret; > >>> > >>> ret = vsock_core_register(&vhost_transport.transport, > >>> - VSOCK_TRANSPORT_F_H2G); > >>> + VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM); > >>> if (ret < 0) > >>> return ret; > >>> > >>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h > >>> index 1c53c4c4d88f..37e55c81e4df 100644 > >>> --- a/include/net/af_vsock.h > >>> +++ b/include/net/af_vsock.h > >>> @@ -78,6 +78,8 @@ struct vsock_sock { > >>> s64 vsock_stream_has_data(struct vsock_sock *vsk); > >>> s64 vsock_stream_has_space(struct vsock_sock *vsk); > >>> struct sock *vsock_create_connected(struct sock *parent); > >>> +int vsock_bind_stream(struct vsock_sock *vsk, > >>> + struct sockaddr_vm *addr); > >>> > >>> /**** TRANSPORT ****/ > >>> > >>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h > >>> index 857df3a3a70d..0975b9c88292 100644 > >>> --- a/include/uapi/linux/virtio_vsock.h > >>> +++ b/include/uapi/linux/virtio_vsock.h > >>> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr { > >>> enum virtio_vsock_type { > >>> VIRTIO_VSOCK_TYPE_STREAM = 1, > >>> VIRTIO_VSOCK_TYPE_SEQPACKET = 2, > >>> + VIRTIO_VSOCK_TYPE_DGRAM = 3, > >>> }; > >>> > >>> enum virtio_vsock_op { > >>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c > >>> index 1893f8aafa48..87e4ae1866d3 100644 > >>> --- a/net/vmw_vsock/af_vsock.c > >>> +++ b/net/vmw_vsock/af_vsock.c > >>> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, > >>> return 0; > >>> } > >>> > >>> +int vsock_bind_stream(struct vsock_sock *vsk, > >>> + struct sockaddr_vm *addr) > >>> +{ > >>> + int retval; > >>> + > >>> + spin_lock_bh(&vsock_table_lock); > >>> + retval = __vsock_bind_connectible(vsk, addr); > >>> + spin_unlock_bh(&vsock_table_lock); > >>> + > >>> + return retval; > >>> +} > >>> +EXPORT_SYMBOL(vsock_bind_stream); > >>> + > >>> static int __vsock_bind_dgram(struct vsock_sock *vsk, > >>> struct sockaddr_vm *addr) > >>> { > >>> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features) > >>> } > >>> > >>> if (features & VSOCK_TRANSPORT_F_DGRAM) { > >>> - if (t_dgram) { > >>> - err = -EBUSY; > >>> - goto err_busy; > >>> + /* TODO: always chose the G2H variant over others, support nesting later */ > >>> + if (features & VSOCK_TRANSPORT_F_G2H) { > >>> + if (t_dgram) > >>> + pr_warn("virtio_vsock: t_dgram already set\n"); > >>> + t_dgram = t; > >>> + } > >>> + > >>> + if (!t_dgram) { > >>> + t_dgram = t; > >>> } > >>> - t_dgram = t; > >>> } > >>> > >>> if (features & VSOCK_TRANSPORT_F_LOCAL) { > >>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c > >>> index 073314312683..d4526ca462d2 100644 > >>> --- a/net/vmw_vsock/virtio_transport.c > >>> +++ b/net/vmw_vsock/virtio_transport.c > >>> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void) > >>> return -ENOMEM; > >>> > >>> ret = vsock_core_register(&virtio_transport.transport, > >>> - VSOCK_TRANSPORT_F_G2H); > >>> + VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM); > >>> if (ret) > >>> goto out_wq; > >>> > >>> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c > >>> index bdf16fff054f..aedb48728677 100644 > >>> --- a/net/vmw_vsock/virtio_transport_common.c > >>> +++ b/net/vmw_vsock/virtio_transport_common.c > >>> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); > >>> > >>> static u16 virtio_transport_get_type(struct sock *sk) > >>> { > >>> - if (sk->sk_type == SOCK_STREAM) > >>> + if (sk->sk_type == SOCK_DGRAM) > >>> + return VIRTIO_VSOCK_TYPE_DGRAM; > >>> + else if (sk->sk_type == SOCK_STREAM) > >>> return VIRTIO_VSOCK_TYPE_STREAM; > >>> else > >>> return VIRTIO_VSOCK_TYPE_SEQPACKET; > >>> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, > >>> vvs = vsk->trans; > >>> > >>> /* we can send less than pkt_len bytes */ > >>> - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) > >>> - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > >>> + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { > >>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > >>> + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > >>> + else > >>> + return 0; > >>> + } > >>> > >>> - /* virtio_transport_get_credit might return less than pkt_len credit */ > >>> - pkt_len = virtio_transport_get_credit(vvs, pkt_len); > >>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) { > >>> + /* virtio_transport_get_credit might return less than pkt_len credit */ > >>> + pkt_len = virtio_transport_get_credit(vvs, pkt_len); > >>> > >>> - /* Do not send zero length OP_RW pkt */ > >>> - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) > >>> - return pkt_len; > >>> + /* Do not send zero length OP_RW pkt */ > >>> + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) > >>> + return pkt_len; > >>> + } > >>> > >>> skb = virtio_transport_alloc_skb(info, pkt_len, > >>> src_cid, src_port, > >>> dst_cid, dst_port, > >>> &err); > >>> if (!skb) { > >>> - virtio_transport_put_credit(vvs, pkt_len); > >>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > >>> + virtio_transport_put_credit(vvs, pkt_len); > >>> return err; > >>> } > >>> > >>> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, > >>> } > >>> EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); > >>> > >>> +static ssize_t > >>> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk, > >>> + struct msghdr *msg, size_t len) > >>> +{ > >>> + struct virtio_vsock_sock *vvs = vsk->trans; > >>> + struct sk_buff *skb; > >>> + size_t total = 0; > >>> + u32 free_space; > >>> + int err = -EFAULT; > >>> + > >>> + spin_lock_bh(&vvs->rx_lock); > >>> + if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) { > >>> + skb = __skb_dequeue(&vvs->rx_queue); > >>> + > >>> + total = len; > >>> + if (total > skb->len - vsock_metadata(skb)->off) > >>> + total = skb->len - vsock_metadata(skb)->off; > >>> + else if (total < skb->len - vsock_metadata(skb)->off) > >>> + msg->msg_flags |= MSG_TRUNC; > >>> + > >>> + /* sk_lock is held by caller so no one else can dequeue. > >>> + * Unlock rx_lock since memcpy_to_msg() may sleep. > >>> + */ > >>> + spin_unlock_bh(&vvs->rx_lock); > >>> + > >>> + err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total); > >>> + if (err) > >>> + return err; > >>> + > >>> + spin_lock_bh(&vvs->rx_lock); > >>> + > >>> + virtio_transport_dec_rx_pkt(vvs, skb); > >>> + consume_skb(skb); > >>> + } > >>> + > >>> + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); > >>> + > >>> + spin_unlock_bh(&vvs->rx_lock); > >>> + > >>> + if (total > 0 && msg->msg_name) { > >>> + /* Provide the address of the sender. */ > >>> + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); > >>> + > >>> + vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid), > >>> + le32_to_cpu(vsock_hdr(skb)->src_port)); > >>> + msg->msg_namelen = sizeof(*vm_addr); > >>> + } > >>> + return total; > >>> +} > >>> + > >>> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk) > >>> +{ > >>> + return virtio_transport_stream_has_data(vsk); > >>> +} > >>> + > >>> int > >>> virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, > >>> struct msghdr *msg, > >>> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk, > >>> struct msghdr *msg, > >>> size_t len, int flags) > >>> { > >>> - return -EOPNOTSUPP; > >>> + struct sock *sk; > >>> + size_t err = 0; > >>> + long timeout; > >>> + > >>> + DEFINE_WAIT(wait); > >>> + > >>> + sk = &vsk->sk; > >>> + err = 0; > >>> + > >>> + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK) > >>> + return -EOPNOTSUPP; > >>> + > >>> + lock_sock(sk); > >>> + > >>> + if (!len) > >>> + goto out; > >>> + > >>> + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); > >>> + > >>> + while (1) { > >>> + s64 ready; > >>> + > >>> + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); > >>> + ready = virtio_transport_dgram_has_data(vsk); > >>> + > >>> + if (ready == 0) { > >>> + if (timeout == 0) { > >>> + err = -EAGAIN; > >>> + finish_wait(sk_sleep(sk), &wait); > >>> + break; > >>> + } > >>> + > >>> + release_sock(sk); > >>> + timeout = schedule_timeout(timeout); > >>> + lock_sock(sk); > >>> + > >>> + if (signal_pending(current)) { > >>> + err = sock_intr_errno(timeout); > >>> + finish_wait(sk_sleep(sk), &wait); > >>> + break; > >>> + } else if (timeout == 0) { > >>> + err = -EAGAIN; > >>> + finish_wait(sk_sleep(sk), &wait); > >>> + break; > >>> + } > >>> + } else { > >>> + finish_wait(sk_sleep(sk), &wait); > >>> + > >>> + if (ready < 0) { > >>> + err = -ENOMEM; > >>> + goto out; > >>> + } > >>> + > >>> + err = virtio_transport_dgram_do_dequeue(vsk, msg, len); > >>> + break; > >>> + } > >>> + } > >>> +out: > >>> + release_sock(sk); > >>> + return err; > >>> } > >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); > ^^^ > May be, this generic data waiting logic should be in af_vsock.c, as for stream/seqpacket? > In this way, another transport which supports SOCK_DGRAM could reuse it. I think that is a great idea. I'll test that change for v2. Thanks. > >>> > >>> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); > >>> int virtio_transport_dgram_bind(struct vsock_sock *vsk, > >>> struct sockaddr_vm *addr) > >>> { > >>> - return -EOPNOTSUPP; > >>> + return vsock_bind_stream(vsk, addr); > >>> } > >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); > >>> > >>> bool virtio_transport_dgram_allow(u32 cid, u32 port) > >>> { > >>> - return false; > >>> + return true; > >>> } > >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); > >>> > >>> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk, > >>> struct msghdr *msg, > >>> size_t dgram_len) > >>> { > >>> - return -EOPNOTSUPP; > >>> + struct virtio_vsock_pkt_info info = { > >>> + .op = VIRTIO_VSOCK_OP_RW, > >>> + .msg = msg, > >>> + .pkt_len = dgram_len, > >>> + .vsk = vsk, > >>> + .remote_cid = remote_addr->svm_cid, > >>> + .remote_port = remote_addr->svm_port, > >>> + }; > >>> + > >>> + return virtio_transport_send_pkt_info(vsk, &info); > >>> } > >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); > >>> > >>> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk, > >>> struct virtio_vsock_hdr *hdr = vsock_hdr(skb); > >>> int err = 0; > >>> > >>> + if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) { > >>> + virtio_transport_recv_enqueue(vsk, skb); > >>> + sk->sk_data_ready(sk); > >>> + return err; > >>> + } > >>> + > >>> switch (le16_to_cpu(hdr->op)) { > >>> case VIRTIO_VSOCK_OP_RW: > >>> virtio_transport_recv_enqueue(vsk, skb); > >>> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, > >>> static bool virtio_transport_valid_type(u16 type) > >>> { > >>> return (type == VIRTIO_VSOCK_TYPE_STREAM) || > >>> - (type == VIRTIO_VSOCK_TYPE_SEQPACKET); > >>> + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || > >>> + (type == VIRTIO_VSOCK_TYPE_DGRAM); > >>> } > >>> > >>> /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex > >>> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, > >>> goto free_pkt; > >>> } > >>> > >>> + if (sk->sk_type == SOCK_DGRAM) { > >>> + virtio_transport_recv_connected(sk, skb); > >>> + goto out; > >>> + } > >>> + > >>> space_available = virtio_transport_space_update(sk, skb); > >>> > >>> /* Update CID in case it has changed after a transport reset event */ > >>> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, > >>> break; > >>> } > >>> > >>> +out: > >>> release_sock(sk); > >>> > >>> /* Release refcnt obtained when we fetched this socket out of the > >>> -- > >>> 2.35.1 > >>> > >> > >> --------------------------------------------------------------------- > >> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org > >> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org > >> > > >
On Thu, Aug 18, 2022 at 08:35:48AM +0000, Arseniy Krasnov wrote: > On Tue, 2022-08-16 at 09:58 +0000, Bobby Eshleman wrote: > > On Wed, Aug 17, 2022 at 05:42:08AM +0000, Arseniy Krasnov wrote: > > > On 17.08.2022 08:01, Arseniy Krasnov wrote: > > > > On 16.08.2022 05:32, Bobby Eshleman wrote: > > > > > CC'ing virtio-dev@lists.oasis-open.org > > > > > > > > > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote: > > > > > > This patch supports dgram in virtio and on the vhost side. > > > > Hello, > > > > > > > > sorry, i don't understand, how this maintains message boundaries? > > > > Or it > > > > is unnecessary for SOCK_DGRAM? > > > > > > > > Thanks > > > > > > Signed-off-by: Jiang Wang <jiang.wang@bytedance.com> > > > > > > Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com> > > > > > > --- > > > > > > drivers/vhost/vsock.c | 2 +- > > > > > > include/net/af_vsock.h | 2 + > > > > > > include/uapi/linux/virtio_vsock.h | 1 + > > > > > > net/vmw_vsock/af_vsock.c | 26 +++- > > > > > > net/vmw_vsock/virtio_transport.c | 2 +- > > > > > > net/vmw_vsock/virtio_transport_common.c | 173 > > > > > > ++++++++++++++++++++++-- > > > > > > 6 files changed, 186 insertions(+), 20 deletions(-) > > > > > > > > > > > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > > > > > > index a5d1bdb786fe..3dc72a5647ca 100644 > > > > > > --- a/drivers/vhost/vsock.c > > > > > > +++ b/drivers/vhost/vsock.c > > > > > > @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void) > > > > > > int ret; > > > > > > > > > > > > ret = vsock_core_register(&vhost_transport.transport, > > > > > > - VSOCK_TRANSPORT_F_H2G); > > > > > > + VSOCK_TRANSPORT_F_H2G | > > > > > > VSOCK_TRANSPORT_F_DGRAM); > > > > > > if (ret < 0) > > > > > > return ret; > > > > > > > > > > > > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h > > > > > > index 1c53c4c4d88f..37e55c81e4df 100644 > > > > > > --- a/include/net/af_vsock.h > > > > > > +++ b/include/net/af_vsock.h > > > > > > @@ -78,6 +78,8 @@ struct vsock_sock { > > > > > > s64 vsock_stream_has_data(struct vsock_sock *vsk); > > > > > > s64 vsock_stream_has_space(struct vsock_sock *vsk); > > > > > > struct sock *vsock_create_connected(struct sock *parent); > > > > > > +int vsock_bind_stream(struct vsock_sock *vsk, > > > > > > + struct sockaddr_vm *addr); > > > > > > > > > > > > /**** TRANSPORT ****/ > > > > > > > > > > > > diff --git a/include/uapi/linux/virtio_vsock.h > > > > > > b/include/uapi/linux/virtio_vsock.h > > > > > > index 857df3a3a70d..0975b9c88292 100644 > > > > > > --- a/include/uapi/linux/virtio_vsock.h > > > > > > +++ b/include/uapi/linux/virtio_vsock.h > > > > > > @@ -70,6 +70,7 @@ struct virtio_vsock_hdr { > > > > > > enum virtio_vsock_type { > > > > > > VIRTIO_VSOCK_TYPE_STREAM = 1, > > > > > > VIRTIO_VSOCK_TYPE_SEQPACKET = 2, > > > > > > + VIRTIO_VSOCK_TYPE_DGRAM = 3, > > > > > > }; > > > > > > > > > > > > enum virtio_vsock_op { > > > > > > diff --git a/net/vmw_vsock/af_vsock.c > > > > > > b/net/vmw_vsock/af_vsock.c > > > > > > index 1893f8aafa48..87e4ae1866d3 100644 > > > > > > --- a/net/vmw_vsock/af_vsock.c > > > > > > +++ b/net/vmw_vsock/af_vsock.c > > > > > > @@ -675,6 +675,19 @@ static int > > > > > > __vsock_bind_connectible(struct vsock_sock *vsk, > > > > > > return 0; > > > > > > } > > > > > > > > > > > > +int vsock_bind_stream(struct vsock_sock *vsk, > > > > > > + struct sockaddr_vm *addr) > > > > > > +{ > > > > > > + int retval; > > > > > > + > > > > > > + spin_lock_bh(&vsock_table_lock); > > > > > > + retval = __vsock_bind_connectible(vsk, addr); > > > > > > + spin_unlock_bh(&vsock_table_lock); > > > > > > + > > > > > > + return retval; > > > > > > +} > > > > > > +EXPORT_SYMBOL(vsock_bind_stream); > > > > > > + > > > > > > static int __vsock_bind_dgram(struct vsock_sock *vsk, > > > > > > struct sockaddr_vm *addr) > > > > > > { > > > > > > @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct > > > > > > vsock_transport *t, int features) > > > > > > } > > > > > > > > > > > > if (features & VSOCK_TRANSPORT_F_DGRAM) { > > > > > > - if (t_dgram) { > > > > > > - err = -EBUSY; > > > > > > - goto err_busy; > > > > > > + /* TODO: always chose the G2H variant over > > > > > > others, support nesting later */ > > > > > > + if (features & VSOCK_TRANSPORT_F_G2H) { > > > > > > + if (t_dgram) > > > > > > + pr_warn("virtio_vsock: t_dgram > > > > > > already set\n"); > > > > > > + t_dgram = t; > > > > > > + } > > > > > > + > > > > > > + if (!t_dgram) { > > > > > > + t_dgram = t; > > > > > > } > > > > > > - t_dgram = t; > > > > > > } > > > > > > > > > > > > if (features & VSOCK_TRANSPORT_F_LOCAL) { > > > > > > diff --git a/net/vmw_vsock/virtio_transport.c > > > > > > b/net/vmw_vsock/virtio_transport.c > > > > > > index 073314312683..d4526ca462d2 100644 > > > > > > --- a/net/vmw_vsock/virtio_transport.c > > > > > > +++ b/net/vmw_vsock/virtio_transport.c > > > > > > @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void) > > > > > > return -ENOMEM; > > > > > > > > > > > > ret = vsock_core_register(&virtio_transport.transport, > > > > > > - VSOCK_TRANSPORT_F_G2H); > > > > > > + VSOCK_TRANSPORT_F_G2H | > > > > > > VSOCK_TRANSPORT_F_DGRAM); > > > > > > if (ret) > > > > > > goto out_wq; > > > > > > > > > > > > diff --git a/net/vmw_vsock/virtio_transport_common.c > > > > > > b/net/vmw_vsock/virtio_transport_common.c > > > > > > index bdf16fff054f..aedb48728677 100644 > > > > > > --- a/net/vmw_vsock/virtio_transport_common.c > > > > > > +++ b/net/vmw_vsock/virtio_transport_common.c > > > > > > @@ -229,7 +229,9 @@ > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); > > > > > > > > > > > > static u16 virtio_transport_get_type(struct sock *sk) > > > > > > { > > > > > > - if (sk->sk_type == SOCK_STREAM) > > > > > > + if (sk->sk_type == SOCK_DGRAM) > > > > > > + return VIRTIO_VSOCK_TYPE_DGRAM; > > > > > > + else if (sk->sk_type == SOCK_STREAM) > > > > > > return VIRTIO_VSOCK_TYPE_STREAM; > > > > > > else > > > > > > return VIRTIO_VSOCK_TYPE_SEQPACKET; > > > > > > @@ -287,22 +289,29 @@ static int > > > > > > virtio_transport_send_pkt_info(struct vsock_sock *vsk, > > > > > > vvs = vsk->trans; > > > > > > > > > > > > /* we can send less than pkt_len bytes */ > > > > > > - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) > > > > > > - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > > > > > > + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { > > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > > > > > > + pkt_len = > > > > > > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > > > > > > + else > > > > > > + return 0; > > > > > > + } > > > > > > > > > > > > - /* virtio_transport_get_credit might return less than > > > > > > pkt_len credit */ > > > > > > - pkt_len = virtio_transport_get_credit(vvs, pkt_len); > > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) { > > > > > > + /* virtio_transport_get_credit might return > > > > > > less than pkt_len credit */ > > > > > > + pkt_len = virtio_transport_get_credit(vvs, > > > > > > pkt_len); > > > > > > > > > > > > - /* Do not send zero length OP_RW pkt */ > > > > > > - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) > > > > > > - return pkt_len; > > > > > > + /* Do not send zero length OP_RW pkt */ > > > > > > + if (pkt_len == 0 && info->op == > > > > > > VIRTIO_VSOCK_OP_RW) > > > > > > + return pkt_len; > > > > > > + } > > > > > > > > > > > > skb = virtio_transport_alloc_skb(info, pkt_len, > > > > > > src_cid, src_port, > > > > > > dst_cid, dst_port, > > > > > > &err); > > > > > > if (!skb) { > > > > > > - virtio_transport_put_credit(vvs, pkt_len); > > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > > > > > > + virtio_transport_put_credit(vvs, > > > > > > pkt_len); > > > > > > return err; > > > > > > } > > > > > > > > > > > > @@ -586,6 +595,61 @@ > > > > > > virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, > > > > > > } > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); > > > > > > > > > > > > +static ssize_t > > > > > > +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk, > > > > > > + struct msghdr *msg, size_t > > > > > > len) > > > > > > +{ > > > > > > + struct virtio_vsock_sock *vvs = vsk->trans; > > > > > > + struct sk_buff *skb; > > > > > > + size_t total = 0; > > > > > > + u32 free_space; > > > > > > + int err = -EFAULT; > > > > > > + > > > > > > + spin_lock_bh(&vvs->rx_lock); > > > > > > + if (total < len && !skb_queue_empty_lockless(&vvs- > > > > > > >rx_queue)) { > > > > > > + skb = __skb_dequeue(&vvs->rx_queue); > > > > > > + > > > > > > + total = len; > > > > > > + if (total > skb->len - vsock_metadata(skb)- > > > > > > >off) > > > > > > + total = skb->len - vsock_metadata(skb)- > > > > > > >off; > > > > > > + else if (total < skb->len - > > > > > > vsock_metadata(skb)->off) > > > > > > + msg->msg_flags |= MSG_TRUNC; > > > > > > + > > > > > > + /* sk_lock is held by caller so no one else can > > > > > > dequeue. > > > > > > + * Unlock rx_lock since memcpy_to_msg() may > > > > > > sleep. > > > > > > + */ > > > > > > + spin_unlock_bh(&vvs->rx_lock); > > > > > > + > > > > > > + err = memcpy_to_msg(msg, skb->data + > > > > > > vsock_metadata(skb)->off, total); > > > > > > + if (err) > > > > > > + return err; > > > > > > + > > > > > > + spin_lock_bh(&vvs->rx_lock); > > > > > > + > > > > > > + virtio_transport_dec_rx_pkt(vvs, skb); > > > > > > + consume_skb(skb); > > > > > > + } > > > > > > + > > > > > > + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs- > > > > > > >last_fwd_cnt); > > > > > > + > > > > > > + spin_unlock_bh(&vvs->rx_lock); > > > > > > + > > > > > > + if (total > 0 && msg->msg_name) { > > > > > > + /* Provide the address of the sender. */ > > > > > > + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, > > > > > > msg->msg_name); > > > > > > + > > > > > > + vsock_addr_init(vm_addr, > > > > > > le64_to_cpu(vsock_hdr(skb)->src_cid), > > > > > > + le32_to_cpu(vsock_hdr(skb)- > > > > > > >src_port)); > > > > > > + msg->msg_namelen = sizeof(*vm_addr); > > > > > > + } > > > > > > + return total; > > > > > > +} > > > > > > + > > > > > > +static s64 virtio_transport_dgram_has_data(struct vsock_sock > > > > > > *vsk) > > > > > > +{ > > > > > > + return virtio_transport_stream_has_data(vsk); > > > > > > +} > > > > > > + > > > > > > int > > > > > > virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, > > > > > > struct msghdr *msg, > > > > > > @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct > > > > > > vsock_sock *vsk, > > > > > > struct msghdr *msg, > > > > > > size_t len, int flags) > > > > > > { > > > > > > - return -EOPNOTSUPP; > > > > > > + struct sock *sk; > > > > > > + size_t err = 0; > > > > > > + long timeout; > > > > > > + > > > > > > + DEFINE_WAIT(wait); > > > > > > + > > > > > > + sk = &vsk->sk; > > > > > > + err = 0; > > > > > > + > > > > > > + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & > > > > > > MSG_PEEK) > > > > > > + return -EOPNOTSUPP; > > > > > > + > > > > > > + lock_sock(sk); > > > > > > + > > > > > > + if (!len) > > > > > > + goto out; > > > > > > + > > > > > > + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); > > > > > > + > > > > > > + while (1) { > > > > > > + s64 ready; > > > > > > + > > > > > > + prepare_to_wait(sk_sleep(sk), &wait, > > > > > > TASK_INTERRUPTIBLE); > > > > > > + ready = virtio_transport_dgram_has_data(vsk); > > > > > > + > > > > > > + if (ready == 0) { > > > > > > + if (timeout == 0) { > > > > > > + err = -EAGAIN; > > > > > > + finish_wait(sk_sleep(sk), > > > > > > &wait); > > > > > > + break; > > > > > > + } > > > > > > + > > > > > > + release_sock(sk); > > > > > > + timeout = schedule_timeout(timeout); > > > > > > + lock_sock(sk); > > > > > > + > > > > > > + if (signal_pending(current)) { > > > > > > + err = sock_intr_errno(timeout); > > > > > > + finish_wait(sk_sleep(sk), > > > > > > &wait); > > > > > > + break; > > > > > > + } else if (timeout == 0) { > > > > > > + err = -EAGAIN; > > > > > > + finish_wait(sk_sleep(sk), > > > > > > &wait); > > > > > > + break; > > > > > > + } > > > > > > + } else { > > > > > > + finish_wait(sk_sleep(sk), &wait); > > > > > > + > > > > > > + if (ready < 0) { > > > > > > + err = -ENOMEM; > > > > > > + goto out; > > > > > > + } > > > > > > + > > > > > > + err = > > > > > > virtio_transport_dgram_do_dequeue(vsk, msg, len); > > > > > > + break; > > > > > > + } > > > > > > + } > > > > > > +out: > > > > > > + release_sock(sk); > > > > > > + return err; > > > > > > } > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); > > > ^^^ > > > May be, this generic data waiting logic should be in af_vsock.c, as > > > for stream/seqpacket? > > > In this way, another transport which supports SOCK_DGRAM could > > > reuse it. > > > > I think that is a great idea. I'll test that change for v2. > > > > Thanks. > > Also for v2, i tested Your patchset a little bit(write here to not > spread over all mails): > 1) seqpacket test in vsock_test.c fails(seems MSG_EOR flag issue) I will investigate. > 2) i can't do rmmod with the following config(after testing): > CONFIG_VSOCKETS=m > CONFIG_VIRTIO_VSOCKETS=m > CONFIG_VIRTIO_VSOCKETS_COMMON=m > CONFIG_VHOST=m > CONFIG_VHOST_VSOCK=m > Guest is shutdown, but rmmod fails. > 3) virtio_transport_init + virtio_transport_exit seems must be > under EXPORT_SYMBOL_GPL(), because both used in another module. Definitely, will fix. > 4) I tried to send 5kb(or 20kb not matter) piece of data, but got > kernel panic both in guest and later in host. > Thanks for catching that. I can reproduce it intermittently, but only for seqpacket. Did you happen to see this for other socket types as well? Thanks > Thank You > > > > > > > > > > > > > > @@ -819,13 +942,13 @@ > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); > > > > > > int virtio_transport_dgram_bind(struct vsock_sock *vsk, > > > > > > struct sockaddr_vm *addr) > > > > > > { > > > > > > - return -EOPNOTSUPP; > > > > > > + return vsock_bind_stream(vsk, addr); > > > > > > } > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); > > > > > > > > > > > > bool virtio_transport_dgram_allow(u32 cid, u32 port) > > > > > > { > > > > > > - return false; > > > > > > + return true; > > > > > > } > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); > > > > > > > > > > > > @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct > > > > > > vsock_sock *vsk, > > > > > > struct msghdr *msg, > > > > > > size_t dgram_len) > > > > > > { > > > > > > - return -EOPNOTSUPP; > > > > > > + struct virtio_vsock_pkt_info info = { > > > > > > + .op = VIRTIO_VSOCK_OP_RW, > > > > > > + .msg = msg, > > > > > > + .pkt_len = dgram_len, > > > > > > + .vsk = vsk, > > > > > > + .remote_cid = remote_addr->svm_cid, > > > > > > + .remote_port = remote_addr->svm_port, > > > > > > + }; > > > > > > + > > > > > > + return virtio_transport_send_pkt_info(vsk, &info); > > > > > > } > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); > > > > > > > > > > > > @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct > > > > > > sock *sk, > > > > > > struct virtio_vsock_hdr *hdr = vsock_hdr(skb); > > > > > > int err = 0; > > > > > > > > > > > > + if (le16_to_cpu(vsock_hdr(skb)->type) == > > > > > > VIRTIO_VSOCK_TYPE_DGRAM) { > > > > > > + virtio_transport_recv_enqueue(vsk, skb); > > > > > > + sk->sk_data_ready(sk); > > > > > > + return err; > > > > > > + } > > > > > > + > > > > > > switch (le16_to_cpu(hdr->op)) { > > > > > > case VIRTIO_VSOCK_OP_RW: > > > > > > virtio_transport_recv_enqueue(vsk, skb); > > > > > > @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct > > > > > > sock *sk, struct sk_buff *skb, > > > > > > static bool virtio_transport_valid_type(u16 type) > > > > > > { > > > > > > return (type == VIRTIO_VSOCK_TYPE_STREAM) || > > > > > > - (type == VIRTIO_VSOCK_TYPE_SEQPACKET); > > > > > > + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || > > > > > > + (type == VIRTIO_VSOCK_TYPE_DGRAM); > > > > > > } > > > > > > > > > > > > /* We are under the virtio-vsock's vsock->rx_lock or vhost- > > > > > > vsock's vq->mutex > > > > > > @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct > > > > > > virtio_transport *t, > > > > > > goto free_pkt; > > > > > > } > > > > > > > > > > > > + if (sk->sk_type == SOCK_DGRAM) { > > > > > > + virtio_transport_recv_connected(sk, skb); > > > > > > + goto out; > > > > > > + } > > > > > > + > > > > > > space_available = virtio_transport_space_update(sk, > > > > > > skb); > > > > > > > > > > > > /* Update CID in case it has changed after a transport > > > > > > reset event */ > > > > > > @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct > > > > > > virtio_transport *t, > > > > > > break; > > > > > > } > > > > > > > > > > > > +out: > > > > > > release_sock(sk); > > > > > > > > > > > > /* Release refcnt obtained when we fetched this socket > > > > > > out of the > > > > > > -- > > > > > > 2.35.1 > > > > > > > > > > > > > > > > ------------------------------------------------------------- > > > > > -------- > > > > > To unsubscribe, e-mail: > > > > > virtio-dev-unsubscribe@lists.oasis-open.org > > > > > For additional commands, e-mail: > > > > > virtio-dev-help@lists.oasis-open.org > > > > > > > > > --------------------------------------------------------------------- > > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org > > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org > >
On 16.08.2022 05:32, Bobby Eshleman wrote: > CC'ing virtio-dev@lists.oasis-open.org > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote: >> This patch supports dgram in virtio and on the vhost side. Hello, sorry, i don't understand, how this maintains message boundaries? Or it is unnecessary for SOCK_DGRAM? Thanks >> >> Signed-off-by: Jiang Wang <jiang.wang@bytedance.com> >> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com> >> --- >> drivers/vhost/vsock.c | 2 +- >> include/net/af_vsock.h | 2 + >> include/uapi/linux/virtio_vsock.h | 1 + >> net/vmw_vsock/af_vsock.c | 26 +++- >> net/vmw_vsock/virtio_transport.c | 2 +- >> net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++-- >> 6 files changed, 186 insertions(+), 20 deletions(-) >> >> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c >> index a5d1bdb786fe..3dc72a5647ca 100644 >> --- a/drivers/vhost/vsock.c >> +++ b/drivers/vhost/vsock.c >> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void) >> int ret; >> >> ret = vsock_core_register(&vhost_transport.transport, >> - VSOCK_TRANSPORT_F_H2G); >> + VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM); >> if (ret < 0) >> return ret; >> >> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h >> index 1c53c4c4d88f..37e55c81e4df 100644 >> --- a/include/net/af_vsock.h >> +++ b/include/net/af_vsock.h >> @@ -78,6 +78,8 @@ struct vsock_sock { >> s64 vsock_stream_has_data(struct vsock_sock *vsk); >> s64 vsock_stream_has_space(struct vsock_sock *vsk); >> struct sock *vsock_create_connected(struct sock *parent); >> +int vsock_bind_stream(struct vsock_sock *vsk, >> + struct sockaddr_vm *addr); >> >> /**** TRANSPORT ****/ >> >> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h >> index 857df3a3a70d..0975b9c88292 100644 >> --- a/include/uapi/linux/virtio_vsock.h >> +++ b/include/uapi/linux/virtio_vsock.h >> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr { >> enum virtio_vsock_type { >> VIRTIO_VSOCK_TYPE_STREAM = 1, >> VIRTIO_VSOCK_TYPE_SEQPACKET = 2, >> + VIRTIO_VSOCK_TYPE_DGRAM = 3, >> }; >> >> enum virtio_vsock_op { >> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c >> index 1893f8aafa48..87e4ae1866d3 100644 >> --- a/net/vmw_vsock/af_vsock.c >> +++ b/net/vmw_vsock/af_vsock.c >> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, >> return 0; >> } >> >> +int vsock_bind_stream(struct vsock_sock *vsk, >> + struct sockaddr_vm *addr) >> +{ >> + int retval; >> + >> + spin_lock_bh(&vsock_table_lock); >> + retval = __vsock_bind_connectible(vsk, addr); >> + spin_unlock_bh(&vsock_table_lock); >> + >> + return retval; >> +} >> +EXPORT_SYMBOL(vsock_bind_stream); >> + >> static int __vsock_bind_dgram(struct vsock_sock *vsk, >> struct sockaddr_vm *addr) >> { >> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features) >> } >> >> if (features & VSOCK_TRANSPORT_F_DGRAM) { >> - if (t_dgram) { >> - err = -EBUSY; >> - goto err_busy; >> + /* TODO: always chose the G2H variant over others, support nesting later */ >> + if (features & VSOCK_TRANSPORT_F_G2H) { >> + if (t_dgram) >> + pr_warn("virtio_vsock: t_dgram already set\n"); >> + t_dgram = t; >> + } >> + >> + if (!t_dgram) { >> + t_dgram = t; >> } >> - t_dgram = t; >> } >> >> if (features & VSOCK_TRANSPORT_F_LOCAL) { >> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c >> index 073314312683..d4526ca462d2 100644 >> --- a/net/vmw_vsock/virtio_transport.c >> +++ b/net/vmw_vsock/virtio_transport.c >> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void) >> return -ENOMEM; >> >> ret = vsock_core_register(&virtio_transport.transport, >> - VSOCK_TRANSPORT_F_G2H); >> + VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM); >> if (ret) >> goto out_wq; >> >> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c >> index bdf16fff054f..aedb48728677 100644 >> --- a/net/vmw_vsock/virtio_transport_common.c >> +++ b/net/vmw_vsock/virtio_transport_common.c >> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); >> >> static u16 virtio_transport_get_type(struct sock *sk) >> { >> - if (sk->sk_type == SOCK_STREAM) >> + if (sk->sk_type == SOCK_DGRAM) >> + return VIRTIO_VSOCK_TYPE_DGRAM; >> + else if (sk->sk_type == SOCK_STREAM) >> return VIRTIO_VSOCK_TYPE_STREAM; >> else >> return VIRTIO_VSOCK_TYPE_SEQPACKET; >> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, >> vvs = vsk->trans; >> >> /* we can send less than pkt_len bytes */ >> - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) >> - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; >> + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { >> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) >> + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; >> + else >> + return 0; >> + } >> >> - /* virtio_transport_get_credit might return less than pkt_len credit */ >> - pkt_len = virtio_transport_get_credit(vvs, pkt_len); >> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) { >> + /* virtio_transport_get_credit might return less than pkt_len credit */ >> + pkt_len = virtio_transport_get_credit(vvs, pkt_len); >> >> - /* Do not send zero length OP_RW pkt */ >> - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) >> - return pkt_len; >> + /* Do not send zero length OP_RW pkt */ >> + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) >> + return pkt_len; >> + } >> >> skb = virtio_transport_alloc_skb(info, pkt_len, >> src_cid, src_port, >> dst_cid, dst_port, >> &err); >> if (!skb) { >> - virtio_transport_put_credit(vvs, pkt_len); >> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) >> + virtio_transport_put_credit(vvs, pkt_len); >> return err; >> } >> >> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, >> } >> EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); >> >> +static ssize_t >> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk, >> + struct msghdr *msg, size_t len) >> +{ >> + struct virtio_vsock_sock *vvs = vsk->trans; >> + struct sk_buff *skb; >> + size_t total = 0; >> + u32 free_space; >> + int err = -EFAULT; >> + >> + spin_lock_bh(&vvs->rx_lock); >> + if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) { >> + skb = __skb_dequeue(&vvs->rx_queue); >> + >> + total = len; >> + if (total > skb->len - vsock_metadata(skb)->off) >> + total = skb->len - vsock_metadata(skb)->off; >> + else if (total < skb->len - vsock_metadata(skb)->off) >> + msg->msg_flags |= MSG_TRUNC; >> + >> + /* sk_lock is held by caller so no one else can dequeue. >> + * Unlock rx_lock since memcpy_to_msg() may sleep. >> + */ >> + spin_unlock_bh(&vvs->rx_lock); >> + >> + err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total); >> + if (err) >> + return err; >> + >> + spin_lock_bh(&vvs->rx_lock); >> + >> + virtio_transport_dec_rx_pkt(vvs, skb); >> + consume_skb(skb); >> + } >> + >> + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); >> + >> + spin_unlock_bh(&vvs->rx_lock); >> + >> + if (total > 0 && msg->msg_name) { >> + /* Provide the address of the sender. */ >> + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); >> + >> + vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid), >> + le32_to_cpu(vsock_hdr(skb)->src_port)); >> + msg->msg_namelen = sizeof(*vm_addr); >> + } >> + return total; >> +} >> + >> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk) >> +{ >> + return virtio_transport_stream_has_data(vsk); >> +} >> + >> int >> virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, >> struct msghdr *msg, >> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk, >> struct msghdr *msg, >> size_t len, int flags) >> { >> - return -EOPNOTSUPP; >> + struct sock *sk; >> + size_t err = 0; >> + long timeout; >> + >> + DEFINE_WAIT(wait); >> + >> + sk = &vsk->sk; >> + err = 0; >> + >> + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK) >> + return -EOPNOTSUPP; >> + >> + lock_sock(sk); >> + >> + if (!len) >> + goto out; >> + >> + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); >> + >> + while (1) { >> + s64 ready; >> + >> + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); >> + ready = virtio_transport_dgram_has_data(vsk); >> + >> + if (ready == 0) { >> + if (timeout == 0) { >> + err = -EAGAIN; >> + finish_wait(sk_sleep(sk), &wait); >> + break; >> + } >> + >> + release_sock(sk); >> + timeout = schedule_timeout(timeout); >> + lock_sock(sk); >> + >> + if (signal_pending(current)) { >> + err = sock_intr_errno(timeout); >> + finish_wait(sk_sleep(sk), &wait); >> + break; >> + } else if (timeout == 0) { >> + err = -EAGAIN; >> + finish_wait(sk_sleep(sk), &wait); >> + break; >> + } >> + } else { >> + finish_wait(sk_sleep(sk), &wait); >> + >> + if (ready < 0) { >> + err = -ENOMEM; >> + goto out; >> + } >> + >> + err = virtio_transport_dgram_do_dequeue(vsk, msg, len); >> + break; >> + } >> + } >> +out: >> + release_sock(sk); >> + return err; >> } >> EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); >> >> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); >> int virtio_transport_dgram_bind(struct vsock_sock *vsk, >> struct sockaddr_vm *addr) >> { >> - return -EOPNOTSUPP; >> + return vsock_bind_stream(vsk, addr); >> } >> EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); >> >> bool virtio_transport_dgram_allow(u32 cid, u32 port) >> { >> - return false; >> + return true; >> } >> EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); >> >> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk, >> struct msghdr *msg, >> size_t dgram_len) >> { >> - return -EOPNOTSUPP; >> + struct virtio_vsock_pkt_info info = { >> + .op = VIRTIO_VSOCK_OP_RW, >> + .msg = msg, >> + .pkt_len = dgram_len, >> + .vsk = vsk, >> + .remote_cid = remote_addr->svm_cid, >> + .remote_port = remote_addr->svm_port, >> + }; >> + >> + return virtio_transport_send_pkt_info(vsk, &info); >> } >> EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); >> >> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk, >> struct virtio_vsock_hdr *hdr = vsock_hdr(skb); >> int err = 0; >> >> + if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) { >> + virtio_transport_recv_enqueue(vsk, skb); >> + sk->sk_data_ready(sk); >> + return err; >> + } >> + >> switch (le16_to_cpu(hdr->op)) { >> case VIRTIO_VSOCK_OP_RW: >> virtio_transport_recv_enqueue(vsk, skb); >> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, >> static bool virtio_transport_valid_type(u16 type) >> { >> return (type == VIRTIO_VSOCK_TYPE_STREAM) || >> - (type == VIRTIO_VSOCK_TYPE_SEQPACKET); >> + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || >> + (type == VIRTIO_VSOCK_TYPE_DGRAM); >> } >> >> /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex >> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, >> goto free_pkt; >> } >> >> + if (sk->sk_type == SOCK_DGRAM) { >> + virtio_transport_recv_connected(sk, skb); >> + goto out; >> + } >> + >> space_available = virtio_transport_space_update(sk, skb); >> >> /* Update CID in case it has changed after a transport reset event */ >> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, >> break; >> } >> >> +out: >> release_sock(sk); >> >> /* Release refcnt obtained when we fetched this socket out of the >> -- >> 2.35.1 >> > > --------------------------------------------------------------------- > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org >
On 17.08.2022 08:01, Arseniy Krasnov wrote: > On 16.08.2022 05:32, Bobby Eshleman wrote: >> CC'ing virtio-dev@lists.oasis-open.org >> >> On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote: >>> This patch supports dgram in virtio and on the vhost side. > Hello, > > sorry, i don't understand, how this maintains message boundaries? Or it > is unnecessary for SOCK_DGRAM? > > Thanks >>> >>> Signed-off-by: Jiang Wang <jiang.wang@bytedance.com> >>> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com> >>> --- >>> drivers/vhost/vsock.c | 2 +- >>> include/net/af_vsock.h | 2 + >>> include/uapi/linux/virtio_vsock.h | 1 + >>> net/vmw_vsock/af_vsock.c | 26 +++- >>> net/vmw_vsock/virtio_transport.c | 2 +- >>> net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++-- >>> 6 files changed, 186 insertions(+), 20 deletions(-) >>> >>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c >>> index a5d1bdb786fe..3dc72a5647ca 100644 >>> --- a/drivers/vhost/vsock.c >>> +++ b/drivers/vhost/vsock.c >>> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void) >>> int ret; >>> >>> ret = vsock_core_register(&vhost_transport.transport, >>> - VSOCK_TRANSPORT_F_H2G); >>> + VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM); >>> if (ret < 0) >>> return ret; >>> >>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h >>> index 1c53c4c4d88f..37e55c81e4df 100644 >>> --- a/include/net/af_vsock.h >>> +++ b/include/net/af_vsock.h >>> @@ -78,6 +78,8 @@ struct vsock_sock { >>> s64 vsock_stream_has_data(struct vsock_sock *vsk); >>> s64 vsock_stream_has_space(struct vsock_sock *vsk); >>> struct sock *vsock_create_connected(struct sock *parent); >>> +int vsock_bind_stream(struct vsock_sock *vsk, >>> + struct sockaddr_vm *addr); >>> >>> /**** TRANSPORT ****/ >>> >>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h >>> index 857df3a3a70d..0975b9c88292 100644 >>> --- a/include/uapi/linux/virtio_vsock.h >>> +++ b/include/uapi/linux/virtio_vsock.h >>> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr { >>> enum virtio_vsock_type { >>> VIRTIO_VSOCK_TYPE_STREAM = 1, >>> VIRTIO_VSOCK_TYPE_SEQPACKET = 2, >>> + VIRTIO_VSOCK_TYPE_DGRAM = 3, >>> }; >>> >>> enum virtio_vsock_op { >>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c >>> index 1893f8aafa48..87e4ae1866d3 100644 >>> --- a/net/vmw_vsock/af_vsock.c >>> +++ b/net/vmw_vsock/af_vsock.c >>> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, >>> return 0; >>> } >>> >>> +int vsock_bind_stream(struct vsock_sock *vsk, >>> + struct sockaddr_vm *addr) >>> +{ >>> + int retval; >>> + >>> + spin_lock_bh(&vsock_table_lock); >>> + retval = __vsock_bind_connectible(vsk, addr); >>> + spin_unlock_bh(&vsock_table_lock); >>> + >>> + return retval; >>> +} >>> +EXPORT_SYMBOL(vsock_bind_stream); >>> + >>> static int __vsock_bind_dgram(struct vsock_sock *vsk, >>> struct sockaddr_vm *addr) >>> { >>> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features) >>> } >>> >>> if (features & VSOCK_TRANSPORT_F_DGRAM) { >>> - if (t_dgram) { >>> - err = -EBUSY; >>> - goto err_busy; >>> + /* TODO: always chose the G2H variant over others, support nesting later */ >>> + if (features & VSOCK_TRANSPORT_F_G2H) { >>> + if (t_dgram) >>> + pr_warn("virtio_vsock: t_dgram already set\n"); >>> + t_dgram = t; >>> + } >>> + >>> + if (!t_dgram) { >>> + t_dgram = t; >>> } >>> - t_dgram = t; >>> } >>> >>> if (features & VSOCK_TRANSPORT_F_LOCAL) { >>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c >>> index 073314312683..d4526ca462d2 100644 >>> --- a/net/vmw_vsock/virtio_transport.c >>> +++ b/net/vmw_vsock/virtio_transport.c >>> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void) >>> return -ENOMEM; >>> >>> ret = vsock_core_register(&virtio_transport.transport, >>> - VSOCK_TRANSPORT_F_G2H); >>> + VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM); >>> if (ret) >>> goto out_wq; >>> >>> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c >>> index bdf16fff054f..aedb48728677 100644 >>> --- a/net/vmw_vsock/virtio_transport_common.c >>> +++ b/net/vmw_vsock/virtio_transport_common.c >>> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); >>> >>> static u16 virtio_transport_get_type(struct sock *sk) >>> { >>> - if (sk->sk_type == SOCK_STREAM) >>> + if (sk->sk_type == SOCK_DGRAM) >>> + return VIRTIO_VSOCK_TYPE_DGRAM; >>> + else if (sk->sk_type == SOCK_STREAM) >>> return VIRTIO_VSOCK_TYPE_STREAM; >>> else >>> return VIRTIO_VSOCK_TYPE_SEQPACKET; >>> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, >>> vvs = vsk->trans; >>> >>> /* we can send less than pkt_len bytes */ >>> - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) >>> - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; >>> + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { >>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) >>> + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; >>> + else >>> + return 0; >>> + } >>> >>> - /* virtio_transport_get_credit might return less than pkt_len credit */ >>> - pkt_len = virtio_transport_get_credit(vvs, pkt_len); >>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) { >>> + /* virtio_transport_get_credit might return less than pkt_len credit */ >>> + pkt_len = virtio_transport_get_credit(vvs, pkt_len); >>> >>> - /* Do not send zero length OP_RW pkt */ >>> - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) >>> - return pkt_len; >>> + /* Do not send zero length OP_RW pkt */ >>> + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) >>> + return pkt_len; >>> + } >>> >>> skb = virtio_transport_alloc_skb(info, pkt_len, >>> src_cid, src_port, >>> dst_cid, dst_port, >>> &err); >>> if (!skb) { >>> - virtio_transport_put_credit(vvs, pkt_len); >>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) >>> + virtio_transport_put_credit(vvs, pkt_len); >>> return err; >>> } >>> >>> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, >>> } >>> EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); >>> >>> +static ssize_t >>> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk, >>> + struct msghdr *msg, size_t len) >>> +{ >>> + struct virtio_vsock_sock *vvs = vsk->trans; >>> + struct sk_buff *skb; >>> + size_t total = 0; >>> + u32 free_space; >>> + int err = -EFAULT; >>> + >>> + spin_lock_bh(&vvs->rx_lock); >>> + if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) { >>> + skb = __skb_dequeue(&vvs->rx_queue); >>> + >>> + total = len; >>> + if (total > skb->len - vsock_metadata(skb)->off) >>> + total = skb->len - vsock_metadata(skb)->off; >>> + else if (total < skb->len - vsock_metadata(skb)->off) >>> + msg->msg_flags |= MSG_TRUNC; >>> + >>> + /* sk_lock is held by caller so no one else can dequeue. >>> + * Unlock rx_lock since memcpy_to_msg() may sleep. >>> + */ >>> + spin_unlock_bh(&vvs->rx_lock); >>> + >>> + err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total); >>> + if (err) >>> + return err; >>> + >>> + spin_lock_bh(&vvs->rx_lock); >>> + >>> + virtio_transport_dec_rx_pkt(vvs, skb); >>> + consume_skb(skb); >>> + } >>> + >>> + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); >>> + >>> + spin_unlock_bh(&vvs->rx_lock); >>> + >>> + if (total > 0 && msg->msg_name) { >>> + /* Provide the address of the sender. */ >>> + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); >>> + >>> + vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid), >>> + le32_to_cpu(vsock_hdr(skb)->src_port)); >>> + msg->msg_namelen = sizeof(*vm_addr); >>> + } >>> + return total; >>> +} >>> + >>> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk) >>> +{ >>> + return virtio_transport_stream_has_data(vsk); >>> +} >>> + >>> int >>> virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, >>> struct msghdr *msg, >>> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk, >>> struct msghdr *msg, >>> size_t len, int flags) >>> { >>> - return -EOPNOTSUPP; >>> + struct sock *sk; >>> + size_t err = 0; >>> + long timeout; >>> + >>> + DEFINE_WAIT(wait); >>> + >>> + sk = &vsk->sk; >>> + err = 0; >>> + >>> + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK) >>> + return -EOPNOTSUPP; >>> + >>> + lock_sock(sk); >>> + >>> + if (!len) >>> + goto out; >>> + >>> + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); >>> + >>> + while (1) { >>> + s64 ready; >>> + >>> + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); >>> + ready = virtio_transport_dgram_has_data(vsk); >>> + >>> + if (ready == 0) { >>> + if (timeout == 0) { >>> + err = -EAGAIN; >>> + finish_wait(sk_sleep(sk), &wait); >>> + break; >>> + } >>> + >>> + release_sock(sk); >>> + timeout = schedule_timeout(timeout); >>> + lock_sock(sk); >>> + >>> + if (signal_pending(current)) { >>> + err = sock_intr_errno(timeout); >>> + finish_wait(sk_sleep(sk), &wait); >>> + break; >>> + } else if (timeout == 0) { >>> + err = -EAGAIN; >>> + finish_wait(sk_sleep(sk), &wait); >>> + break; >>> + } >>> + } else { >>> + finish_wait(sk_sleep(sk), &wait); >>> + >>> + if (ready < 0) { >>> + err = -ENOMEM; >>> + goto out; >>> + } >>> + >>> + err = virtio_transport_dgram_do_dequeue(vsk, msg, len); >>> + break; >>> + } >>> + } >>> +out: >>> + release_sock(sk); >>> + return err; >>> } >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); ^^^ May be, this generic data waiting logic should be in af_vsock.c, as for stream/seqpacket? In this way, another transport which supports SOCK_DGRAM could reuse it. >>> >>> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); >>> int virtio_transport_dgram_bind(struct vsock_sock *vsk, >>> struct sockaddr_vm *addr) >>> { >>> - return -EOPNOTSUPP; >>> + return vsock_bind_stream(vsk, addr); >>> } >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); >>> >>> bool virtio_transport_dgram_allow(u32 cid, u32 port) >>> { >>> - return false; >>> + return true; >>> } >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); >>> >>> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk, >>> struct msghdr *msg, >>> size_t dgram_len) >>> { >>> - return -EOPNOTSUPP; >>> + struct virtio_vsock_pkt_info info = { >>> + .op = VIRTIO_VSOCK_OP_RW, >>> + .msg = msg, >>> + .pkt_len = dgram_len, >>> + .vsk = vsk, >>> + .remote_cid = remote_addr->svm_cid, >>> + .remote_port = remote_addr->svm_port, >>> + }; >>> + >>> + return virtio_transport_send_pkt_info(vsk, &info); >>> } >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); >>> >>> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk, >>> struct virtio_vsock_hdr *hdr = vsock_hdr(skb); >>> int err = 0; >>> >>> + if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) { >>> + virtio_transport_recv_enqueue(vsk, skb); >>> + sk->sk_data_ready(sk); >>> + return err; >>> + } >>> + >>> switch (le16_to_cpu(hdr->op)) { >>> case VIRTIO_VSOCK_OP_RW: >>> virtio_transport_recv_enqueue(vsk, skb); >>> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, >>> static bool virtio_transport_valid_type(u16 type) >>> { >>> return (type == VIRTIO_VSOCK_TYPE_STREAM) || >>> - (type == VIRTIO_VSOCK_TYPE_SEQPACKET); >>> + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || >>> + (type == VIRTIO_VSOCK_TYPE_DGRAM); >>> } >>> >>> /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex >>> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, >>> goto free_pkt; >>> } >>> >>> + if (sk->sk_type == SOCK_DGRAM) { >>> + virtio_transport_recv_connected(sk, skb); >>> + goto out; >>> + } >>> + >>> space_available = virtio_transport_space_update(sk, skb); >>> >>> /* Update CID in case it has changed after a transport reset event */ >>> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, >>> break; >>> } >>> >>> +out: >>> release_sock(sk); >>> >>> /* Release refcnt obtained when we fetched this socket out of the >>> -- >>> 2.35.1 >>> >> >> --------------------------------------------------------------------- >> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org >> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org >> >
On Tue, 2022-08-16 at 09:57 +0000, Bobby Eshleman wrote: > On Wed, Aug 17, 2022 at 05:01:00AM +0000, Arseniy Krasnov wrote: > > On 16.08.2022 05:32, Bobby Eshleman wrote: > > > CC'ing virtio-dev@lists.oasis-open.org > > > > > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote: > > > > This patch supports dgram in virtio and on the vhost side. > > Hello, > > > > sorry, i don't understand, how this maintains message boundaries? > > Or it > > is unnecessary for SOCK_DGRAM? > > > > Thanks > > If I understand your question, the length is included in the header, > so > receivers always know that header start + header length + payload > length > marks the message boundary. I mean, consider the following case: host sends 5kb packet to guest. Guest uses 4kb virtio rx buffers, so in drivers/vhost/vsock.c this 5kb packet(e.g. its payload) will be placed to 2 virtio rx buffers - 4kb to first buffer and rest 1kb to second buffer. Is it implemented, that receiver gets whole 5kb piece of data during single 'read()/recv()' system call? Thanks > > > > > Signed-off-by: Jiang Wang <jiang.wang@bytedance.com> > > > > Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com> > > > > --- > > > > drivers/vhost/vsock.c | 2 +- > > > > include/net/af_vsock.h | 2 + > > > > include/uapi/linux/virtio_vsock.h | 1 + > > > > net/vmw_vsock/af_vsock.c | 26 +++- > > > > net/vmw_vsock/virtio_transport.c | 2 +- > > > > net/vmw_vsock/virtio_transport_common.c | 173 > > > > ++++++++++++++++++++++-- > > > > 6 files changed, 186 insertions(+), 20 deletions(-) > > > > > > > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > > > > index a5d1bdb786fe..3dc72a5647ca 100644 > > > > --- a/drivers/vhost/vsock.c > > > > +++ b/drivers/vhost/vsock.c > > > > @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void) > > > > int ret; > > > > > > > > ret = vsock_core_register(&vhost_transport.transport, > > > > - VSOCK_TRANSPORT_F_H2G); > > > > + VSOCK_TRANSPORT_F_H2G | > > > > VSOCK_TRANSPORT_F_DGRAM); > > > > if (ret < 0) > > > > return ret; > > > > > > > > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h > > > > index 1c53c4c4d88f..37e55c81e4df 100644 > > > > --- a/include/net/af_vsock.h > > > > +++ b/include/net/af_vsock.h > > > > @@ -78,6 +78,8 @@ struct vsock_sock { > > > > s64 vsock_stream_has_data(struct vsock_sock *vsk); > > > > s64 vsock_stream_has_space(struct vsock_sock *vsk); > > > > struct sock *vsock_create_connected(struct sock *parent); > > > > +int vsock_bind_stream(struct vsock_sock *vsk, > > > > + struct sockaddr_vm *addr); > > > > > > > > /**** TRANSPORT ****/ > > > > > > > > diff --git a/include/uapi/linux/virtio_vsock.h > > > > b/include/uapi/linux/virtio_vsock.h > > > > index 857df3a3a70d..0975b9c88292 100644 > > > > --- a/include/uapi/linux/virtio_vsock.h > > > > +++ b/include/uapi/linux/virtio_vsock.h > > > > @@ -70,6 +70,7 @@ struct virtio_vsock_hdr { > > > > enum virtio_vsock_type { > > > > VIRTIO_VSOCK_TYPE_STREAM = 1, > > > > VIRTIO_VSOCK_TYPE_SEQPACKET = 2, > > > > + VIRTIO_VSOCK_TYPE_DGRAM = 3, > > > > }; > > > > > > > > enum virtio_vsock_op { > > > > diff --git a/net/vmw_vsock/af_vsock.c > > > > b/net/vmw_vsock/af_vsock.c > > > > index 1893f8aafa48..87e4ae1866d3 100644 > > > > --- a/net/vmw_vsock/af_vsock.c > > > > +++ b/net/vmw_vsock/af_vsock.c > > > > @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct > > > > vsock_sock *vsk, > > > > return 0; > > > > } > > > > > > > > +int vsock_bind_stream(struct vsock_sock *vsk, > > > > + struct sockaddr_vm *addr) > > > > +{ > > > > + int retval; > > > > + > > > > + spin_lock_bh(&vsock_table_lock); > > > > + retval = __vsock_bind_connectible(vsk, addr); > > > > + spin_unlock_bh(&vsock_table_lock); > > > > + > > > > + return retval; > > > > +} > > > > +EXPORT_SYMBOL(vsock_bind_stream); > > > > + > > > > static int __vsock_bind_dgram(struct vsock_sock *vsk, > > > > struct sockaddr_vm *addr) > > > > { > > > > @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct > > > > vsock_transport *t, int features) > > > > } > > > > > > > > if (features & VSOCK_TRANSPORT_F_DGRAM) { > > > > - if (t_dgram) { > > > > - err = -EBUSY; > > > > - goto err_busy; > > > > + /* TODO: always chose the G2H variant over > > > > others, support nesting later */ > > > > + if (features & VSOCK_TRANSPORT_F_G2H) { > > > > + if (t_dgram) > > > > + pr_warn("virtio_vsock: t_dgram > > > > already set\n"); > > > > + t_dgram = t; > > > > + } > > > > + > > > > + if (!t_dgram) { > > > > + t_dgram = t; > > > > } > > > > - t_dgram = t; > > > > } > > > > > > > > if (features & VSOCK_TRANSPORT_F_LOCAL) { > > > > diff --git a/net/vmw_vsock/virtio_transport.c > > > > b/net/vmw_vsock/virtio_transport.c > > > > index 073314312683..d4526ca462d2 100644 > > > > --- a/net/vmw_vsock/virtio_transport.c > > > > +++ b/net/vmw_vsock/virtio_transport.c > > > > @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void) > > > > return -ENOMEM; > > > > > > > > ret = vsock_core_register(&virtio_transport.transport, > > > > - VSOCK_TRANSPORT_F_G2H); > > > > + VSOCK_TRANSPORT_F_G2H | > > > > VSOCK_TRANSPORT_F_DGRAM); > > > > if (ret) > > > > goto out_wq; > > > > > > > > diff --git a/net/vmw_vsock/virtio_transport_common.c > > > > b/net/vmw_vsock/virtio_transport_common.c > > > > index bdf16fff054f..aedb48728677 100644 > > > > --- a/net/vmw_vsock/virtio_transport_common.c > > > > +++ b/net/vmw_vsock/virtio_transport_common.c > > > > @@ -229,7 +229,9 @@ > > > > EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); > > > > > > > > static u16 virtio_transport_get_type(struct sock *sk) > > > > { > > > > - if (sk->sk_type == SOCK_STREAM) > > > > + if (sk->sk_type == SOCK_DGRAM) > > > > + return VIRTIO_VSOCK_TYPE_DGRAM; > > > > + else if (sk->sk_type == SOCK_STREAM) > > > > return VIRTIO_VSOCK_TYPE_STREAM; > > > > else > > > > return VIRTIO_VSOCK_TYPE_SEQPACKET; > > > > @@ -287,22 +289,29 @@ static int > > > > virtio_transport_send_pkt_info(struct vsock_sock *vsk, > > > > vvs = vsk->trans; > > > > > > > > /* we can send less than pkt_len bytes */ > > > > - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) > > > > - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > > > > + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > > > > + pkt_len = > > > > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > > > > + else > > > > + return 0; > > > > + } > > > > > > > > - /* virtio_transport_get_credit might return less than > > > > pkt_len credit */ > > > > - pkt_len = virtio_transport_get_credit(vvs, pkt_len); > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) { > > > > + /* virtio_transport_get_credit might return > > > > less than pkt_len credit */ > > > > + pkt_len = virtio_transport_get_credit(vvs, > > > > pkt_len); > > > > > > > > - /* Do not send zero length OP_RW pkt */ > > > > - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) > > > > - return pkt_len; > > > > + /* Do not send zero length OP_RW pkt */ > > > > + if (pkt_len == 0 && info->op == > > > > VIRTIO_VSOCK_OP_RW) > > > > + return pkt_len; > > > > + } > > > > > > > > skb = virtio_transport_alloc_skb(info, pkt_len, > > > > src_cid, src_port, > > > > dst_cid, dst_port, > > > > &err); > > > > if (!skb) { > > > > - virtio_transport_put_credit(vvs, pkt_len); > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > > > > + virtio_transport_put_credit(vvs, > > > > pkt_len); > > > > return err; > > > > } > > > > > > > > @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct > > > > vsock_sock *vsk, > > > > } > > > > EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); > > > > > > > > +static ssize_t > > > > +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk, > > > > + struct msghdr *msg, size_t > > > > len) > > > > +{ > > > > + struct virtio_vsock_sock *vvs = vsk->trans; > > > > + struct sk_buff *skb; > > > > + size_t total = 0; > > > > + u32 free_space; > > > > + int err = -EFAULT; > > > > + > > > > + spin_lock_bh(&vvs->rx_lock); > > > > + if (total < len && !skb_queue_empty_lockless(&vvs- > > > > >rx_queue)) { > > > > + skb = __skb_dequeue(&vvs->rx_queue); > > > > + > > > > + total = len; > > > > + if (total > skb->len - vsock_metadata(skb)- > > > > >off) > > > > + total = skb->len - vsock_metadata(skb)- > > > > >off; > > > > + else if (total < skb->len - > > > > vsock_metadata(skb)->off) > > > > + msg->msg_flags |= MSG_TRUNC; > > > > + > > > > + /* sk_lock is held by caller so no one else can > > > > dequeue. > > > > + * Unlock rx_lock since memcpy_to_msg() may > > > > sleep. > > > > + */ > > > > + spin_unlock_bh(&vvs->rx_lock); > > > > + > > > > + err = memcpy_to_msg(msg, skb->data + > > > > vsock_metadata(skb)->off, total); > > > > + if (err) > > > > + return err; > > > > + > > > > + spin_lock_bh(&vvs->rx_lock); > > > > + > > > > + virtio_transport_dec_rx_pkt(vvs, skb); > > > > + consume_skb(skb); > > > > + } > > > > + > > > > + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs- > > > > >last_fwd_cnt); > > > > + > > > > + spin_unlock_bh(&vvs->rx_lock); > > > > + > > > > + if (total > 0 && msg->msg_name) { > > > > + /* Provide the address of the sender. */ > > > > + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, > > > > msg->msg_name); > > > > + > > > > + vsock_addr_init(vm_addr, > > > > le64_to_cpu(vsock_hdr(skb)->src_cid), > > > > + le32_to_cpu(vsock_hdr(skb)- > > > > >src_port)); > > > > + msg->msg_namelen = sizeof(*vm_addr); > > > > + } > > > > + return total; > > > > +} > > > > + > > > > +static s64 virtio_transport_dgram_has_data(struct vsock_sock > > > > *vsk) > > > > +{ > > > > + return virtio_transport_stream_has_data(vsk); > > > > +} > > > > + > > > > int > > > > virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, > > > > struct msghdr *msg, > > > > @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct > > > > vsock_sock *vsk, > > > > struct msghdr *msg, > > > > size_t len, int flags) > > > > { > > > > - return -EOPNOTSUPP; > > > > + struct sock *sk; > > > > + size_t err = 0; > > > > + long timeout; > > > > + > > > > + DEFINE_WAIT(wait); > > > > + > > > > + sk = &vsk->sk; > > > > + err = 0; > > > > + > > > > + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & > > > > MSG_PEEK) > > > > + return -EOPNOTSUPP; > > > > + > > > > + lock_sock(sk); > > > > + > > > > + if (!len) > > > > + goto out; > > > > + > > > > + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); > > > > + > > > > + while (1) { > > > > + s64 ready; > > > > + > > > > + prepare_to_wait(sk_sleep(sk), &wait, > > > > TASK_INTERRUPTIBLE); > > > > + ready = virtio_transport_dgram_has_data(vsk); > > > > + > > > > + if (ready == 0) { > > > > + if (timeout == 0) { > > > > + err = -EAGAIN; > > > > + finish_wait(sk_sleep(sk), > > > > &wait); > > > > + break; > > > > + } > > > > + > > > > + release_sock(sk); > > > > + timeout = schedule_timeout(timeout); > > > > + lock_sock(sk); > > > > + > > > > + if (signal_pending(current)) { > > > > + err = sock_intr_errno(timeout); > > > > + finish_wait(sk_sleep(sk), > > > > &wait); > > > > + break; > > > > + } else if (timeout == 0) { > > > > + err = -EAGAIN; > > > > + finish_wait(sk_sleep(sk), > > > > &wait); > > > > + break; > > > > + } > > > > + } else { > > > > + finish_wait(sk_sleep(sk), &wait); > > > > + > > > > + if (ready < 0) { > > > > + err = -ENOMEM; > > > > + goto out; > > > > + } > > > > + > > > > + err = > > > > virtio_transport_dgram_do_dequeue(vsk, msg, len); > > > > + break; > > > > + } > > > > + } > > > > +out: > > > > + release_sock(sk); > > > > + return err; > > > > } > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); > > > > > > > > @@ -819,13 +942,13 @@ > > > > EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); > > > > int virtio_transport_dgram_bind(struct vsock_sock *vsk, > > > > struct sockaddr_vm *addr) > > > > { > > > > - return -EOPNOTSUPP; > > > > + return vsock_bind_stream(vsk, addr); > > > > } > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); > > > > > > > > bool virtio_transport_dgram_allow(u32 cid, u32 port) > > > > { > > > > - return false; > > > > + return true; > > > > } > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); > > > > > > > > @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct > > > > vsock_sock *vsk, > > > > struct msghdr *msg, > > > > size_t dgram_len) > > > > { > > > > - return -EOPNOTSUPP; > > > > + struct virtio_vsock_pkt_info info = { > > > > + .op = VIRTIO_VSOCK_OP_RW, > > > > + .msg = msg, > > > > + .pkt_len = dgram_len, > > > > + .vsk = vsk, > > > > + .remote_cid = remote_addr->svm_cid, > > > > + .remote_port = remote_addr->svm_port, > > > > + }; > > > > + > > > > + return virtio_transport_send_pkt_info(vsk, &info); > > > > } > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); > > > > > > > > @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct > > > > sock *sk, > > > > struct virtio_vsock_hdr *hdr = vsock_hdr(skb); > > > > int err = 0; > > > > > > > > + if (le16_to_cpu(vsock_hdr(skb)->type) == > > > > VIRTIO_VSOCK_TYPE_DGRAM) { > > > > + virtio_transport_recv_enqueue(vsk, skb); > > > > + sk->sk_data_ready(sk); > > > > + return err; > > > > + } > > > > + > > > > switch (le16_to_cpu(hdr->op)) { > > > > case VIRTIO_VSOCK_OP_RW: > > > > virtio_transport_recv_enqueue(vsk, skb); > > > > @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock > > > > *sk, struct sk_buff *skb, > > > > static bool virtio_transport_valid_type(u16 type) > > > > { > > > > return (type == VIRTIO_VSOCK_TYPE_STREAM) || > > > > - (type == VIRTIO_VSOCK_TYPE_SEQPACKET); > > > > + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || > > > > + (type == VIRTIO_VSOCK_TYPE_DGRAM); > > > > } > > > > > > > > /* We are under the virtio-vsock's vsock->rx_lock or vhost- > > > > vsock's vq->mutex > > > > @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct > > > > virtio_transport *t, > > > > goto free_pkt; > > > > } > > > > > > > > + if (sk->sk_type == SOCK_DGRAM) { > > > > + virtio_transport_recv_connected(sk, skb); > > > > + goto out; > > > > + } > > > > + > > > > space_available = virtio_transport_space_update(sk, > > > > skb); > > > > > > > > /* Update CID in case it has changed after a transport > > > > reset event */ > > > > @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct > > > > virtio_transport *t, > > > > break; > > > > } > > > > > > > > +out: > > > > release_sock(sk); > > > > > > > > /* Release refcnt obtained when we fetched this socket > > > > out of the > > > > -- > > > > 2.35.1 > > > > > > > > > > --------------------------------------------------------------- > > > ------ > > > To unsubscribe, e-mail: > > > virtio-dev-unsubscribe@lists.oasis-open.org > > > For additional commands, e-mail: > > > virtio-dev-help@lists.oasis-open.org > > >
On Tue, 2022-08-16 at 09:58 +0000, Bobby Eshleman wrote: > On Wed, Aug 17, 2022 at 05:42:08AM +0000, Arseniy Krasnov wrote: > > On 17.08.2022 08:01, Arseniy Krasnov wrote: > > > On 16.08.2022 05:32, Bobby Eshleman wrote: > > > > CC'ing virtio-dev@lists.oasis-open.org > > > > > > > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote: > > > > > This patch supports dgram in virtio and on the vhost side. > > > Hello, > > > > > > sorry, i don't understand, how this maintains message boundaries? > > > Or it > > > is unnecessary for SOCK_DGRAM? > > > > > > Thanks > > > > > Signed-off-by: Jiang Wang <jiang.wang@bytedance.com> > > > > > Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com> > > > > > --- > > > > > drivers/vhost/vsock.c | 2 +- > > > > > include/net/af_vsock.h | 2 + > > > > > include/uapi/linux/virtio_vsock.h | 1 + > > > > > net/vmw_vsock/af_vsock.c | 26 +++- > > > > > net/vmw_vsock/virtio_transport.c | 2 +- > > > > > net/vmw_vsock/virtio_transport_common.c | 173 > > > > > ++++++++++++++++++++++-- > > > > > 6 files changed, 186 insertions(+), 20 deletions(-) > > > > > > > > > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > > > > > index a5d1bdb786fe..3dc72a5647ca 100644 > > > > > --- a/drivers/vhost/vsock.c > > > > > +++ b/drivers/vhost/vsock.c > > > > > @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void) > > > > > int ret; > > > > > > > > > > ret = vsock_core_register(&vhost_transport.transport, > > > > > - VSOCK_TRANSPORT_F_H2G); > > > > > + VSOCK_TRANSPORT_F_H2G | > > > > > VSOCK_TRANSPORT_F_DGRAM); > > > > > if (ret < 0) > > > > > return ret; > > > > > > > > > > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h > > > > > index 1c53c4c4d88f..37e55c81e4df 100644 > > > > > --- a/include/net/af_vsock.h > > > > > +++ b/include/net/af_vsock.h > > > > > @@ -78,6 +78,8 @@ struct vsock_sock { > > > > > s64 vsock_stream_has_data(struct vsock_sock *vsk); > > > > > s64 vsock_stream_has_space(struct vsock_sock *vsk); > > > > > struct sock *vsock_create_connected(struct sock *parent); > > > > > +int vsock_bind_stream(struct vsock_sock *vsk, > > > > > + struct sockaddr_vm *addr); > > > > > > > > > > /**** TRANSPORT ****/ > > > > > > > > > > diff --git a/include/uapi/linux/virtio_vsock.h > > > > > b/include/uapi/linux/virtio_vsock.h > > > > > index 857df3a3a70d..0975b9c88292 100644 > > > > > --- a/include/uapi/linux/virtio_vsock.h > > > > > +++ b/include/uapi/linux/virtio_vsock.h > > > > > @@ -70,6 +70,7 @@ struct virtio_vsock_hdr { > > > > > enum virtio_vsock_type { > > > > > VIRTIO_VSOCK_TYPE_STREAM = 1, > > > > > VIRTIO_VSOCK_TYPE_SEQPACKET = 2, > > > > > + VIRTIO_VSOCK_TYPE_DGRAM = 3, > > > > > }; > > > > > > > > > > enum virtio_vsock_op { > > > > > diff --git a/net/vmw_vsock/af_vsock.c > > > > > b/net/vmw_vsock/af_vsock.c > > > > > index 1893f8aafa48..87e4ae1866d3 100644 > > > > > --- a/net/vmw_vsock/af_vsock.c > > > > > +++ b/net/vmw_vsock/af_vsock.c > > > > > @@ -675,6 +675,19 @@ static int > > > > > __vsock_bind_connectible(struct vsock_sock *vsk, > > > > > return 0; > > > > > } > > > > > > > > > > +int vsock_bind_stream(struct vsock_sock *vsk, > > > > > + struct sockaddr_vm *addr) > > > > > +{ > > > > > + int retval; > > > > > + > > > > > + spin_lock_bh(&vsock_table_lock); > > > > > + retval = __vsock_bind_connectible(vsk, addr); > > > > > + spin_unlock_bh(&vsock_table_lock); > > > > > + > > > > > + return retval; > > > > > +} > > > > > +EXPORT_SYMBOL(vsock_bind_stream); > > > > > + > > > > > static int __vsock_bind_dgram(struct vsock_sock *vsk, > > > > > struct sockaddr_vm *addr) > > > > > { > > > > > @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct > > > > > vsock_transport *t, int features) > > > > > } > > > > > > > > > > if (features & VSOCK_TRANSPORT_F_DGRAM) { > > > > > - if (t_dgram) { > > > > > - err = -EBUSY; > > > > > - goto err_busy; > > > > > + /* TODO: always chose the G2H variant over > > > > > others, support nesting later */ > > > > > + if (features & VSOCK_TRANSPORT_F_G2H) { > > > > > + if (t_dgram) > > > > > + pr_warn("virtio_vsock: t_dgram > > > > > already set\n"); > > > > > + t_dgram = t; > > > > > + } > > > > > + > > > > > + if (!t_dgram) { > > > > > + t_dgram = t; > > > > > } > > > > > - t_dgram = t; > > > > > } > > > > > > > > > > if (features & VSOCK_TRANSPORT_F_LOCAL) { > > > > > diff --git a/net/vmw_vsock/virtio_transport.c > > > > > b/net/vmw_vsock/virtio_transport.c > > > > > index 073314312683..d4526ca462d2 100644 > > > > > --- a/net/vmw_vsock/virtio_transport.c > > > > > +++ b/net/vmw_vsock/virtio_transport.c > > > > > @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void) > > > > > return -ENOMEM; > > > > > > > > > > ret = vsock_core_register(&virtio_transport.transport, > > > > > - VSOCK_TRANSPORT_F_G2H); > > > > > + VSOCK_TRANSPORT_F_G2H | > > > > > VSOCK_TRANSPORT_F_DGRAM); > > > > > if (ret) > > > > > goto out_wq; > > > > > > > > > > diff --git a/net/vmw_vsock/virtio_transport_common.c > > > > > b/net/vmw_vsock/virtio_transport_common.c > > > > > index bdf16fff054f..aedb48728677 100644 > > > > > --- a/net/vmw_vsock/virtio_transport_common.c > > > > > +++ b/net/vmw_vsock/virtio_transport_common.c > > > > > @@ -229,7 +229,9 @@ > > > > > EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); > > > > > > > > > > static u16 virtio_transport_get_type(struct sock *sk) > > > > > { > > > > > - if (sk->sk_type == SOCK_STREAM) > > > > > + if (sk->sk_type == SOCK_DGRAM) > > > > > + return VIRTIO_VSOCK_TYPE_DGRAM; > > > > > + else if (sk->sk_type == SOCK_STREAM) > > > > > return VIRTIO_VSOCK_TYPE_STREAM; > > > > > else > > > > > return VIRTIO_VSOCK_TYPE_SEQPACKET; > > > > > @@ -287,22 +289,29 @@ static int > > > > > virtio_transport_send_pkt_info(struct vsock_sock *vsk, > > > > > vvs = vsk->trans; > > > > > > > > > > /* we can send less than pkt_len bytes */ > > > > > - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) > > > > > - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > > > > > + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > > > > > + pkt_len = > > > > > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > > > > > + else > > > > > + return 0; > > > > > + } > > > > > > > > > > - /* virtio_transport_get_credit might return less than > > > > > pkt_len credit */ > > > > > - pkt_len = virtio_transport_get_credit(vvs, pkt_len); > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) { > > > > > + /* virtio_transport_get_credit might return > > > > > less than pkt_len credit */ > > > > > + pkt_len = virtio_transport_get_credit(vvs, > > > > > pkt_len); > > > > > > > > > > - /* Do not send zero length OP_RW pkt */ > > > > > - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) > > > > > - return pkt_len; > > > > > + /* Do not send zero length OP_RW pkt */ > > > > > + if (pkt_len == 0 && info->op == > > > > > VIRTIO_VSOCK_OP_RW) > > > > > + return pkt_len; > > > > > + } > > > > > > > > > > skb = virtio_transport_alloc_skb(info, pkt_len, > > > > > src_cid, src_port, > > > > > dst_cid, dst_port, > > > > > &err); > > > > > if (!skb) { > > > > > - virtio_transport_put_credit(vvs, pkt_len); > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > > > > > + virtio_transport_put_credit(vvs, > > > > > pkt_len); > > > > > return err; > > > > > } > > > > > > > > > > @@ -586,6 +595,61 @@ > > > > > virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, > > > > > } > > > > > EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); > > > > > > > > > > +static ssize_t > > > > > +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk, > > > > > + struct msghdr *msg, size_t > > > > > len) > > > > > +{ > > > > > + struct virtio_vsock_sock *vvs = vsk->trans; > > > > > + struct sk_buff *skb; > > > > > + size_t total = 0; > > > > > + u32 free_space; > > > > > + int err = -EFAULT; > > > > > + > > > > > + spin_lock_bh(&vvs->rx_lock); > > > > > + if (total < len && !skb_queue_empty_lockless(&vvs- > > > > > >rx_queue)) { > > > > > + skb = __skb_dequeue(&vvs->rx_queue); > > > > > + > > > > > + total = len; > > > > > + if (total > skb->len - vsock_metadata(skb)- > > > > > >off) > > > > > + total = skb->len - vsock_metadata(skb)- > > > > > >off; > > > > > + else if (total < skb->len - > > > > > vsock_metadata(skb)->off) > > > > > + msg->msg_flags |= MSG_TRUNC; > > > > > + > > > > > + /* sk_lock is held by caller so no one else can > > > > > dequeue. > > > > > + * Unlock rx_lock since memcpy_to_msg() may > > > > > sleep. > > > > > + */ > > > > > + spin_unlock_bh(&vvs->rx_lock); > > > > > + > > > > > + err = memcpy_to_msg(msg, skb->data + > > > > > vsock_metadata(skb)->off, total); > > > > > + if (err) > > > > > + return err; > > > > > + > > > > > + spin_lock_bh(&vvs->rx_lock); > > > > > + > > > > > + virtio_transport_dec_rx_pkt(vvs, skb); > > > > > + consume_skb(skb); > > > > > + } > > > > > + > > > > > + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs- > > > > > >last_fwd_cnt); > > > > > + > > > > > + spin_unlock_bh(&vvs->rx_lock); > > > > > + > > > > > + if (total > 0 && msg->msg_name) { > > > > > + /* Provide the address of the sender. */ > > > > > + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, > > > > > msg->msg_name); > > > > > + > > > > > + vsock_addr_init(vm_addr, > > > > > le64_to_cpu(vsock_hdr(skb)->src_cid), > > > > > + le32_to_cpu(vsock_hdr(skb)- > > > > > >src_port)); > > > > > + msg->msg_namelen = sizeof(*vm_addr); > > > > > + } > > > > > + return total; > > > > > +} > > > > > + > > > > > +static s64 virtio_transport_dgram_has_data(struct vsock_sock > > > > > *vsk) > > > > > +{ > > > > > + return virtio_transport_stream_has_data(vsk); > > > > > +} > > > > > + > > > > > int > > > > > virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, > > > > > struct msghdr *msg, > > > > > @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct > > > > > vsock_sock *vsk, > > > > > struct msghdr *msg, > > > > > size_t len, int flags) > > > > > { > > > > > - return -EOPNOTSUPP; > > > > > + struct sock *sk; > > > > > + size_t err = 0; > > > > > + long timeout; > > > > > + > > > > > + DEFINE_WAIT(wait); > > > > > + > > > > > + sk = &vsk->sk; > > > > > + err = 0; > > > > > + > > > > > + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & > > > > > MSG_PEEK) > > > > > + return -EOPNOTSUPP; > > > > > + > > > > > + lock_sock(sk); > > > > > + > > > > > + if (!len) > > > > > + goto out; > > > > > + > > > > > + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); > > > > > + > > > > > + while (1) { > > > > > + s64 ready; > > > > > + > > > > > + prepare_to_wait(sk_sleep(sk), &wait, > > > > > TASK_INTERRUPTIBLE); > > > > > + ready = virtio_transport_dgram_has_data(vsk); > > > > > + > > > > > + if (ready == 0) { > > > > > + if (timeout == 0) { > > > > > + err = -EAGAIN; > > > > > + finish_wait(sk_sleep(sk), > > > > > &wait); > > > > > + break; > > > > > + } > > > > > + > > > > > + release_sock(sk); > > > > > + timeout = schedule_timeout(timeout); > > > > > + lock_sock(sk); > > > > > + > > > > > + if (signal_pending(current)) { > > > > > + err = sock_intr_errno(timeout); > > > > > + finish_wait(sk_sleep(sk), > > > > > &wait); > > > > > + break; > > > > > + } else if (timeout == 0) { > > > > > + err = -EAGAIN; > > > > > + finish_wait(sk_sleep(sk), > > > > > &wait); > > > > > + break; > > > > > + } > > > > > + } else { > > > > > + finish_wait(sk_sleep(sk), &wait); > > > > > + > > > > > + if (ready < 0) { > > > > > + err = -ENOMEM; > > > > > + goto out; > > > > > + } > > > > > + > > > > > + err = > > > > > virtio_transport_dgram_do_dequeue(vsk, msg, len); > > > > > + break; > > > > > + } > > > > > + } > > > > > +out: > > > > > + release_sock(sk); > > > > > + return err; > > > > > } > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); > > ^^^ > > May be, this generic data waiting logic should be in af_vsock.c, as > > for stream/seqpacket? > > In this way, another transport which supports SOCK_DGRAM could > > reuse it. > > I think that is a great idea. I'll test that change for v2. > > Thanks. Also for v2, i tested Your patchset a little bit(write here to not spread over all mails): 1) seqpacket test in vsock_test.c fails(seems MSG_EOR flag issue) 2) i can't do rmmod with the following config(after testing): CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS_COMMON=m CONFIG_VHOST=m CONFIG_VHOST_VSOCK=m Guest is shutdown, but rmmod fails. 3) virtio_transport_init + virtio_transport_exit seems must be under EXPORT_SYMBOL_GPL(), because both used in another module. 4) I tried to send 5kb(or 20kb not matter) piece of data, but got kernel panic both in guest and later in host. Thank You > > > > > > > > > > > @@ -819,13 +942,13 @@ > > > > > EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); > > > > > int virtio_transport_dgram_bind(struct vsock_sock *vsk, > > > > > struct sockaddr_vm *addr) > > > > > { > > > > > - return -EOPNOTSUPP; > > > > > + return vsock_bind_stream(vsk, addr); > > > > > } > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); > > > > > > > > > > bool virtio_transport_dgram_allow(u32 cid, u32 port) > > > > > { > > > > > - return false; > > > > > + return true; > > > > > } > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); > > > > > > > > > > @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct > > > > > vsock_sock *vsk, > > > > > struct msghdr *msg, > > > > > size_t dgram_len) > > > > > { > > > > > - return -EOPNOTSUPP; > > > > > + struct virtio_vsock_pkt_info info = { > > > > > + .op = VIRTIO_VSOCK_OP_RW, > > > > > + .msg = msg, > > > > > + .pkt_len = dgram_len, > > > > > + .vsk = vsk, > > > > > + .remote_cid = remote_addr->svm_cid, > > > > > + .remote_port = remote_addr->svm_port, > > > > > + }; > > > > > + > > > > > + return virtio_transport_send_pkt_info(vsk, &info); > > > > > } > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); > > > > > > > > > > @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct > > > > > sock *sk, > > > > > struct virtio_vsock_hdr *hdr = vsock_hdr(skb); > > > > > int err = 0; > > > > > > > > > > + if (le16_to_cpu(vsock_hdr(skb)->type) == > > > > > VIRTIO_VSOCK_TYPE_DGRAM) { > > > > > + virtio_transport_recv_enqueue(vsk, skb); > > > > > + sk->sk_data_ready(sk); > > > > > + return err; > > > > > + } > > > > > + > > > > > switch (le16_to_cpu(hdr->op)) { > > > > > case VIRTIO_VSOCK_OP_RW: > > > > > virtio_transport_recv_enqueue(vsk, skb); > > > > > @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct > > > > > sock *sk, struct sk_buff *skb, > > > > > static bool virtio_transport_valid_type(u16 type) > > > > > { > > > > > return (type == VIRTIO_VSOCK_TYPE_STREAM) || > > > > > - (type == VIRTIO_VSOCK_TYPE_SEQPACKET); > > > > > + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || > > > > > + (type == VIRTIO_VSOCK_TYPE_DGRAM); > > > > > } > > > > > > > > > > /* We are under the virtio-vsock's vsock->rx_lock or vhost- > > > > > vsock's vq->mutex > > > > > @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct > > > > > virtio_transport *t, > > > > > goto free_pkt; > > > > > } > > > > > > > > > > + if (sk->sk_type == SOCK_DGRAM) { > > > > > + virtio_transport_recv_connected(sk, skb); > > > > > + goto out; > > > > > + } > > > > > + > > > > > space_available = virtio_transport_space_update(sk, > > > > > skb); > > > > > > > > > > /* Update CID in case it has changed after a transport > > > > > reset event */ > > > > > @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct > > > > > virtio_transport *t, > > > > > break; > > > > > } > > > > > > > > > > +out: > > > > > release_sock(sk); > > > > > > > > > > /* Release refcnt obtained when we fetched this socket > > > > > out of the > > > > > -- > > > > > 2.35.1 > > > > > > > > > > > > > ------------------------------------------------------------- > > > > -------- > > > > To unsubscribe, e-mail: > > > > virtio-dev-unsubscribe@lists.oasis-open.org > > > > For additional commands, e-mail: > > > > virtio-dev-help@lists.oasis-open.org > > > > > > --------------------------------------------------------------------- > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org >
On Tue, 2022-08-16 at 20:52 +0000, Bobby Eshleman wrote: > On Thu, Aug 18, 2022 at 08:35:48AM +0000, Arseniy Krasnov wrote: > > On Tue, 2022-08-16 at 09:58 +0000, Bobby Eshleman wrote: > > > On Wed, Aug 17, 2022 at 05:42:08AM +0000, Arseniy Krasnov wrote: > > > > On 17.08.2022 08:01, Arseniy Krasnov wrote: > > > > > On 16.08.2022 05:32, Bobby Eshleman wrote: > > > > > > CC'ing virtio-dev@lists.oasis-open.org > > > > > > > > > > > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman > > > > > > wrote: > > > > > > > This patch supports dgram in virtio and on the vhost > > > > > > > side. > > > > > Hello, > > > > > > > > > > sorry, i don't understand, how this maintains message > > > > > boundaries? > > > > > Or it > > > > > is unnecessary for SOCK_DGRAM? > > > > > > > > > > Thanks > > > > > > > Signed-off-by: Jiang Wang <jiang.wang@bytedance.com> > > > > > > > Signed-off-by: Bobby Eshleman < > > > > > > > bobby.eshleman@bytedance.com> > > > > > > > --- > > > > > > > drivers/vhost/vsock.c | 2 +- > > > > > > > include/net/af_vsock.h | 2 + > > > > > > > include/uapi/linux/virtio_vsock.h | 1 + > > > > > > > net/vmw_vsock/af_vsock.c | 26 +++- > > > > > > > net/vmw_vsock/virtio_transport.c | 2 +- > > > > > > > net/vmw_vsock/virtio_transport_common.c | 173 > > > > > > > ++++++++++++++++++++++-- > > > > > > > 6 files changed, 186 insertions(+), 20 deletions(-) > > > > > > > > > > > > > > diff --git a/drivers/vhost/vsock.c > > > > > > > b/drivers/vhost/vsock.c > > > > > > > index a5d1bdb786fe..3dc72a5647ca 100644 > > > > > > > --- a/drivers/vhost/vsock.c > > > > > > > +++ b/drivers/vhost/vsock.c > > > > > > > @@ -925,7 +925,7 @@ static int __init > > > > > > > vhost_vsock_init(void) > > > > > > > int ret; > > > > > > > > > > > > > > ret = vsock_core_register(&vhost_transport.transport, > > > > > > > - VSOCK_TRANSPORT_F_H2G); > > > > > > > + VSOCK_TRANSPORT_F_H2G | > > > > > > > VSOCK_TRANSPORT_F_DGRAM); > > > > > > > if (ret < 0) > > > > > > > return ret; > > > > > > > > > > > > > > diff --git a/include/net/af_vsock.h > > > > > > > b/include/net/af_vsock.h > > > > > > > index 1c53c4c4d88f..37e55c81e4df 100644 > > > > > > > --- a/include/net/af_vsock.h > > > > > > > +++ b/include/net/af_vsock.h > > > > > > > @@ -78,6 +78,8 @@ struct vsock_sock { > > > > > > > s64 vsock_stream_has_data(struct vsock_sock *vsk); > > > > > > > s64 vsock_stream_has_space(struct vsock_sock *vsk); > > > > > > > struct sock *vsock_create_connected(struct sock > > > > > > > *parent); > > > > > > > +int vsock_bind_stream(struct vsock_sock *vsk, > > > > > > > + struct sockaddr_vm *addr); > > > > > > > > > > > > > > /**** TRANSPORT ****/ > > > > > > > > > > > > > > diff --git a/include/uapi/linux/virtio_vsock.h > > > > > > > b/include/uapi/linux/virtio_vsock.h > > > > > > > index 857df3a3a70d..0975b9c88292 100644 > > > > > > > --- a/include/uapi/linux/virtio_vsock.h > > > > > > > +++ b/include/uapi/linux/virtio_vsock.h > > > > > > > @@ -70,6 +70,7 @@ struct virtio_vsock_hdr { > > > > > > > enum virtio_vsock_type { > > > > > > > VIRTIO_VSOCK_TYPE_STREAM = 1, > > > > > > > VIRTIO_VSOCK_TYPE_SEQPACKET = 2, > > > > > > > + VIRTIO_VSOCK_TYPE_DGRAM = 3, > > > > > > > }; > > > > > > > > > > > > > > enum virtio_vsock_op { > > > > > > > diff --git a/net/vmw_vsock/af_vsock.c > > > > > > > b/net/vmw_vsock/af_vsock.c > > > > > > > index 1893f8aafa48..87e4ae1866d3 100644 > > > > > > > --- a/net/vmw_vsock/af_vsock.c > > > > > > > +++ b/net/vmw_vsock/af_vsock.c > > > > > > > @@ -675,6 +675,19 @@ static int > > > > > > > __vsock_bind_connectible(struct vsock_sock *vsk, > > > > > > > return 0; > > > > > > > } > > > > > > > > > > > > > > +int vsock_bind_stream(struct vsock_sock *vsk, > > > > > > > + struct sockaddr_vm *addr) > > > > > > > +{ > > > > > > > + int retval; > > > > > > > + > > > > > > > + spin_lock_bh(&vsock_table_lock); > > > > > > > + retval = __vsock_bind_connectible(vsk, addr); > > > > > > > + spin_unlock_bh(&vsock_table_lock); > > > > > > > + > > > > > > > + return retval; > > > > > > > +} > > > > > > > +EXPORT_SYMBOL(vsock_bind_stream); > > > > > > > + > > > > > > > static int __vsock_bind_dgram(struct vsock_sock *vsk, > > > > > > > struct sockaddr_vm *addr) > > > > > > > { > > > > > > > @@ -2363,11 +2376,16 @@ int vsock_core_register(const > > > > > > > struct > > > > > > > vsock_transport *t, int features) > > > > > > > } > > > > > > > > > > > > > > if (features & VSOCK_TRANSPORT_F_DGRAM) { > > > > > > > - if (t_dgram) { > > > > > > > - err = -EBUSY; > > > > > > > - goto err_busy; > > > > > > > + /* TODO: always chose the G2H variant over > > > > > > > others, support nesting later */ > > > > > > > + if (features & VSOCK_TRANSPORT_F_G2H) { > > > > > > > + if (t_dgram) > > > > > > > + pr_warn("virtio_vsock: t_dgram > > > > > > > already set\n"); > > > > > > > + t_dgram = t; > > > > > > > + } > > > > > > > + > > > > > > > + if (!t_dgram) { > > > > > > > + t_dgram = t; > > > > > > > } > > > > > > > - t_dgram = t; > > > > > > > } > > > > > > > > > > > > > > if (features & VSOCK_TRANSPORT_F_LOCAL) { > > > > > > > diff --git a/net/vmw_vsock/virtio_transport.c > > > > > > > b/net/vmw_vsock/virtio_transport.c > > > > > > > index 073314312683..d4526ca462d2 100644 > > > > > > > --- a/net/vmw_vsock/virtio_transport.c > > > > > > > +++ b/net/vmw_vsock/virtio_transport.c > > > > > > > @@ -850,7 +850,7 @@ static int __init > > > > > > > virtio_vsock_init(void) > > > > > > > return -ENOMEM; > > > > > > > > > > > > > > ret = vsock_core_register(&virtio_transport.transport, > > > > > > > - VSOCK_TRANSPORT_F_G2H); > > > > > > > + VSOCK_TRANSPORT_F_G2H | > > > > > > > VSOCK_TRANSPORT_F_DGRAM); > > > > > > > if (ret) > > > > > > > goto out_wq; > > > > > > > > > > > > > > diff --git a/net/vmw_vsock/virtio_transport_common.c > > > > > > > b/net/vmw_vsock/virtio_transport_common.c > > > > > > > index bdf16fff054f..aedb48728677 100644 > > > > > > > --- a/net/vmw_vsock/virtio_transport_common.c > > > > > > > +++ b/net/vmw_vsock/virtio_transport_common.c > > > > > > > @@ -229,7 +229,9 @@ > > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); > > > > > > > > > > > > > > static u16 virtio_transport_get_type(struct sock *sk) > > > > > > > { > > > > > > > - if (sk->sk_type == SOCK_STREAM) > > > > > > > + if (sk->sk_type == SOCK_DGRAM) > > > > > > > + return VIRTIO_VSOCK_TYPE_DGRAM; > > > > > > > + else if (sk->sk_type == SOCK_STREAM) > > > > > > > return VIRTIO_VSOCK_TYPE_STREAM; > > > > > > > else > > > > > > > return VIRTIO_VSOCK_TYPE_SEQPACKET; > > > > > > > @@ -287,22 +289,29 @@ static int > > > > > > > virtio_transport_send_pkt_info(struct vsock_sock *vsk, > > > > > > > vvs = vsk->trans; > > > > > > > > > > > > > > /* we can send less than pkt_len bytes */ > > > > > > > - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) > > > > > > > - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > > > > > > > + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { > > > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > > > > > > > + pkt_len = > > > > > > > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > > > > > > > + else > > > > > > > + return 0; > > > > > > > + } > > > > > > > > > > > > > > - /* virtio_transport_get_credit might return less than > > > > > > > pkt_len credit */ > > > > > > > - pkt_len = virtio_transport_get_credit(vvs, pkt_len); > > > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) { > > > > > > > + /* virtio_transport_get_credit might return > > > > > > > less than pkt_len credit */ > > > > > > > + pkt_len = virtio_transport_get_credit(vvs, > > > > > > > pkt_len); > > > > > > > > > > > > > > - /* Do not send zero length OP_RW pkt */ > > > > > > > - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) > > > > > > > - return pkt_len; > > > > > > > + /* Do not send zero length OP_RW pkt */ > > > > > > > + if (pkt_len == 0 && info->op == > > > > > > > VIRTIO_VSOCK_OP_RW) > > > > > > > + return pkt_len; > > > > > > > + } > > > > > > > > > > > > > > skb = virtio_transport_alloc_skb(info, pkt_len, > > > > > > > src_cid, src_port, > > > > > > > dst_cid, dst_port, > > > > > > > &err); > > > > > > > if (!skb) { > > > > > > > - virtio_transport_put_credit(vvs, pkt_len); > > > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > > > > > > > + virtio_transport_put_credit(vvs, > > > > > > > pkt_len); > > > > > > > return err; > > > > > > > } > > > > > > > > > > > > > > @@ -586,6 +595,61 @@ > > > > > > > virtio_transport_seqpacket_dequeue(struct vsock_sock > > > > > > > *vsk, > > > > > > > } > > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); > > > > > > > > > > > > > > +static ssize_t > > > > > > > +virtio_transport_dgram_do_dequeue(struct vsock_sock > > > > > > > *vsk, > > > > > > > + struct msghdr *msg, size_t > > > > > > > len) > > > > > > > +{ > > > > > > > + struct virtio_vsock_sock *vvs = vsk->trans; > > > > > > > + struct sk_buff *skb; > > > > > > > + size_t total = 0; > > > > > > > + u32 free_space; > > > > > > > + int err = -EFAULT; > > > > > > > + > > > > > > > + spin_lock_bh(&vvs->rx_lock); > > > > > > > + if (total < len && !skb_queue_empty_lockless(&vvs- > > > > > > > > rx_queue)) { > > > > > > > + skb = __skb_dequeue(&vvs->rx_queue); > > > > > > > + > > > > > > > + total = len; > > > > > > > + if (total > skb->len - vsock_metadata(skb)- > > > > > > > > off) > > > > > > > + total = skb->len - vsock_metadata(skb)- > > > > > > > > off; > > > > > > > + else if (total < skb->len - > > > > > > > vsock_metadata(skb)->off) > > > > > > > + msg->msg_flags |= MSG_TRUNC; > > > > > > > + > > > > > > > + /* sk_lock is held by caller so no one else can > > > > > > > dequeue. > > > > > > > + * Unlock rx_lock since memcpy_to_msg() may > > > > > > > sleep. > > > > > > > + */ > > > > > > > + spin_unlock_bh(&vvs->rx_lock); > > > > > > > + > > > > > > > + err = memcpy_to_msg(msg, skb->data + > > > > > > > vsock_metadata(skb)->off, total); > > > > > > > + if (err) > > > > > > > + return err; > > > > > > > + > > > > > > > + spin_lock_bh(&vvs->rx_lock); > > > > > > > + > > > > > > > + virtio_transport_dec_rx_pkt(vvs, skb); > > > > > > > + consume_skb(skb); > > > > > > > + } > > > > > > > + > > > > > > > + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs- > > > > > > > > last_fwd_cnt); > > > > > > > + > > > > > > > + spin_unlock_bh(&vvs->rx_lock); > > > > > > > + > > > > > > > + if (total > 0 && msg->msg_name) { > > > > > > > + /* Provide the address of the sender. */ > > > > > > > + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, > > > > > > > msg->msg_name); > > > > > > > + > > > > > > > + vsock_addr_init(vm_addr, > > > > > > > le64_to_cpu(vsock_hdr(skb)->src_cid), > > > > > > > + le32_to_cpu(vsock_hdr(skb)- > > > > > > > > src_port)); > > > > > > > + msg->msg_namelen = sizeof(*vm_addr); > > > > > > > + } > > > > > > > + return total; > > > > > > > +} > > > > > > > + > > > > > > > +static s64 virtio_transport_dgram_has_data(struct > > > > > > > vsock_sock > > > > > > > *vsk) > > > > > > > +{ > > > > > > > + return virtio_transport_stream_has_data(vsk); > > > > > > > +} > > > > > > > + > > > > > > > int > > > > > > > virtio_transport_seqpacket_enqueue(struct vsock_sock > > > > > > > *vsk, > > > > > > > struct msghdr *msg, > > > > > > > @@ -611,7 +675,66 @@ > > > > > > > virtio_transport_dgram_dequeue(struct > > > > > > > vsock_sock *vsk, > > > > > > > struct msghdr *msg, > > > > > > > size_t len, int flags) > > > > > > > { > > > > > > > - return -EOPNOTSUPP; > > > > > > > + struct sock *sk; > > > > > > > + size_t err = 0; > > > > > > > + long timeout; > > > > > > > + > > > > > > > + DEFINE_WAIT(wait); > > > > > > > + > > > > > > > + sk = &vsk->sk; > > > > > > > + err = 0; > > > > > > > + > > > > > > > + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & > > > > > > > MSG_PEEK) > > > > > > > + return -EOPNOTSUPP; > > > > > > > + > > > > > > > + lock_sock(sk); > > > > > > > + > > > > > > > + if (!len) > > > > > > > + goto out; > > > > > > > + > > > > > > > + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); > > > > > > > + > > > > > > > + while (1) { > > > > > > > + s64 ready; > > > > > > > + > > > > > > > + prepare_to_wait(sk_sleep(sk), &wait, > > > > > > > TASK_INTERRUPTIBLE); > > > > > > > + ready = virtio_transport_dgram_has_data(vsk); > > > > > > > + > > > > > > > + if (ready == 0) { > > > > > > > + if (timeout == 0) { > > > > > > > + err = -EAGAIN; > > > > > > > + finish_wait(sk_sleep(sk), > > > > > > > &wait); > > > > > > > + break; > > > > > > > + } > > > > > > > + > > > > > > > + release_sock(sk); > > > > > > > + timeout = schedule_timeout(timeout); > > > > > > > + lock_sock(sk); > > > > > > > + > > > > > > > + if (signal_pending(current)) { > > > > > > > + err = sock_intr_errno(timeout); > > > > > > > + finish_wait(sk_sleep(sk), > > > > > > > &wait); > > > > > > > + break; > > > > > > > + } else if (timeout == 0) { > > > > > > > + err = -EAGAIN; > > > > > > > + finish_wait(sk_sleep(sk), > > > > > > > &wait); > > > > > > > + break; > > > > > > > + } > > > > > > > + } else { > > > > > > > + finish_wait(sk_sleep(sk), &wait); > > > > > > > + > > > > > > > + if (ready < 0) { > > > > > > > + err = -ENOMEM; > > > > > > > + goto out; > > > > > > > + } > > > > > > > + > > > > > > > + err = > > > > > > > virtio_transport_dgram_do_dequeue(vsk, msg, len); > > > > > > > + break; > > > > > > > + } > > > > > > > + } > > > > > > > +out: > > > > > > > + release_sock(sk); > > > > > > > + return err; > > > > > > > } > > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); > > > > ^^^ > > > > May be, this generic data waiting logic should be in > > > > af_vsock.c, as > > > > for stream/seqpacket? > > > > In this way, another transport which supports SOCK_DGRAM could > > > > reuse it. > > > > > > I think that is a great idea. I'll test that change for v2. > > > > > > Thanks. > > > > Also for v2, i tested Your patchset a little bit(write here to not > > spread over all mails): > > 1) seqpacket test in vsock_test.c fails(seems MSG_EOR flag issue) > > I will investigate. > > > 2) i can't do rmmod with the following config(after testing): > > CONFIG_VSOCKETS=m > > CONFIG_VIRTIO_VSOCKETS=m > > CONFIG_VIRTIO_VSOCKETS_COMMON=m > > CONFIG_VHOST=m > > CONFIG_VHOST_VSOCK=m > > Guest is shutdown, but rmmod fails. > > 3) virtio_transport_init + virtio_transport_exit seems must be > > under EXPORT_SYMBOL_GPL(), because both used in another module. > > Definitely, will fix. > > > 4) I tried to send 5kb(or 20kb not matter) piece of data, but > > got > > kernel panic both in guest and later in host. > > > > Thanks for catching that. I can reproduce it intermittently, but only > for seqpacket. Did you happen to see this for other socket types as > well? > > Thanks I got this for SOCK_DGRAM, i didnt test seqpacket or stream. Thanks, Arseniy > > > Thank You > > > > > > > > > > > > > > @@ -819,13 +942,13 @@ > > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); > > > > > > > int virtio_transport_dgram_bind(struct vsock_sock *vsk, > > > > > > > struct sockaddr_vm *addr) > > > > > > > { > > > > > > > - return -EOPNOTSUPP; > > > > > > > + return vsock_bind_stream(vsk, addr); > > > > > > > } > > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); > > > > > > > > > > > > > > bool virtio_transport_dgram_allow(u32 cid, u32 port) > > > > > > > { > > > > > > > - return false; > > > > > > > + return true; > > > > > > > } > > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); > > > > > > > > > > > > > > @@ -861,7 +984,16 @@ > > > > > > > virtio_transport_dgram_enqueue(struct > > > > > > > vsock_sock *vsk, > > > > > > > struct msghdr *msg, > > > > > > > size_t dgram_len) > > > > > > > { > > > > > > > - return -EOPNOTSUPP; > > > > > > > + struct virtio_vsock_pkt_info info = { > > > > > > > + .op = VIRTIO_VSOCK_OP_RW, > > > > > > > + .msg = msg, > > > > > > > + .pkt_len = dgram_len, > > > > > > > + .vsk = vsk, > > > > > > > + .remote_cid = remote_addr->svm_cid, > > > > > > > + .remote_port = remote_addr->svm_port, > > > > > > > + }; > > > > > > > + > > > > > > > + return virtio_transport_send_pkt_info(vsk, &info); > > > > > > > } > > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); > > > > > > > > > > > > > > @@ -1165,6 +1297,12 @@ > > > > > > > virtio_transport_recv_connected(struct > > > > > > > sock *sk, > > > > > > > struct virtio_vsock_hdr *hdr = vsock_hdr(skb); > > > > > > > int err = 0; > > > > > > > > > > > > > > + if (le16_to_cpu(vsock_hdr(skb)->type) == > > > > > > > VIRTIO_VSOCK_TYPE_DGRAM) { > > > > > > > + virtio_transport_recv_enqueue(vsk, skb); > > > > > > > + sk->sk_data_ready(sk); > > > > > > > + return err; > > > > > > > + } > > > > > > > + > > > > > > > switch (le16_to_cpu(hdr->op)) { > > > > > > > case VIRTIO_VSOCK_OP_RW: > > > > > > > virtio_transport_recv_enqueue(vsk, skb); > > > > > > > @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct > > > > > > > sock *sk, struct sk_buff *skb, > > > > > > > static bool virtio_transport_valid_type(u16 type) > > > > > > > { > > > > > > > return (type == VIRTIO_VSOCK_TYPE_STREAM) || > > > > > > > - (type == VIRTIO_VSOCK_TYPE_SEQPACKET); > > > > > > > + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || > > > > > > > + (type == VIRTIO_VSOCK_TYPE_DGRAM); > > > > > > > } > > > > > > > > > > > > > > /* We are under the virtio-vsock's vsock->rx_lock or > > > > > > > vhost- > > > > > > > vsock's vq->mutex > > > > > > > @@ -1384,6 +1523,11 @@ void > > > > > > > virtio_transport_recv_pkt(struct > > > > > > > virtio_transport *t, > > > > > > > goto free_pkt; > > > > > > > } > > > > > > > > > > > > > > + if (sk->sk_type == SOCK_DGRAM) { > > > > > > > + virtio_transport_recv_connected(sk, skb); > > > > > > > + goto out; > > > > > > > + } > > > > > > > + > > > > > > > space_available = virtio_transport_space_update(sk, > > > > > > > skb); > > > > > > > > > > > > > > /* Update CID in case it has changed after a transport > > > > > > > reset event */ > > > > > > > @@ -1415,6 +1559,7 @@ void > > > > > > > virtio_transport_recv_pkt(struct > > > > > > > virtio_transport *t, > > > > > > > break; > > > > > > > } > > > > > > > > > > > > > > +out: > > > > > > > release_sock(sk); > > > > > > > > > > > > > > /* Release refcnt obtained when we fetched this socket > > > > > > > out of the > > > > > > > -- > > > > > > > 2.35.1 > > > > > > > > > > > > > > > > > > > --------------------------------------------------------- > > > > > > ---- > > > > > > -------- > > > > > > To unsubscribe, e-mail: > > > > > > virtio-dev-unsubscribe@lists.oasis-open.org > > > > > > For additional commands, e-mail: > > > > > > virtio-dev-help@lists.oasis-open.org > > > > > > > > > > > > --------------------------------------------------------------- > > > ------ > > > To unsubscribe, e-mail: > > > virtio-dev-unsubscribe@lists.oasis-open.org > > > For additional commands, e-mail: > > > virtio-dev-help@lists.oasis-open.org > > >
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index a5d1bdb786fe..3dc72a5647ca 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void) int ret; ret = vsock_core_register(&vhost_transport.transport, - VSOCK_TRANSPORT_F_H2G); + VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM); if (ret < 0) return ret; diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 1c53c4c4d88f..37e55c81e4df 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -78,6 +78,8 @@ struct vsock_sock { s64 vsock_stream_has_data(struct vsock_sock *vsk); s64 vsock_stream_has_space(struct vsock_sock *vsk); struct sock *vsock_create_connected(struct sock *parent); +int vsock_bind_stream(struct vsock_sock *vsk, + struct sockaddr_vm *addr); /**** TRANSPORT ****/ diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h index 857df3a3a70d..0975b9c88292 100644 --- a/include/uapi/linux/virtio_vsock.h +++ b/include/uapi/linux/virtio_vsock.h @@ -70,6 +70,7 @@ struct virtio_vsock_hdr { enum virtio_vsock_type { VIRTIO_VSOCK_TYPE_STREAM = 1, VIRTIO_VSOCK_TYPE_SEQPACKET = 2, + VIRTIO_VSOCK_TYPE_DGRAM = 3, }; enum virtio_vsock_op { diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 1893f8aafa48..87e4ae1866d3 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, return 0; } +int vsock_bind_stream(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + int retval; + + spin_lock_bh(&vsock_table_lock); + retval = __vsock_bind_connectible(vsk, addr); + spin_unlock_bh(&vsock_table_lock); + + return retval; +} +EXPORT_SYMBOL(vsock_bind_stream); + static int __vsock_bind_dgram(struct vsock_sock *vsk, struct sockaddr_vm *addr) { @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features) } if (features & VSOCK_TRANSPORT_F_DGRAM) { - if (t_dgram) { - err = -EBUSY; - goto err_busy; + /* TODO: always chose the G2H variant over others, support nesting later */ + if (features & VSOCK_TRANSPORT_F_G2H) { + if (t_dgram) + pr_warn("virtio_vsock: t_dgram already set\n"); + t_dgram = t; + } + + if (!t_dgram) { + t_dgram = t; } - t_dgram = t; } if (features & VSOCK_TRANSPORT_F_LOCAL) { diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 073314312683..d4526ca462d2 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void) return -ENOMEM; ret = vsock_core_register(&virtio_transport.transport, - VSOCK_TRANSPORT_F_G2H); + VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM); if (ret) goto out_wq; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index bdf16fff054f..aedb48728677 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); static u16 virtio_transport_get_type(struct sock *sk) { - if (sk->sk_type == SOCK_STREAM) + if (sk->sk_type == SOCK_DGRAM) + return VIRTIO_VSOCK_TYPE_DGRAM; + else if (sk->sk_type == SOCK_STREAM) return VIRTIO_VSOCK_TYPE_STREAM; else return VIRTIO_VSOCK_TYPE_SEQPACKET; @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, vvs = vsk->trans; /* we can send less than pkt_len bytes */ - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; + else + return 0; + } - /* virtio_transport_get_credit might return less than pkt_len credit */ - pkt_len = virtio_transport_get_credit(vvs, pkt_len); + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) { + /* virtio_transport_get_credit might return less than pkt_len credit */ + pkt_len = virtio_transport_get_credit(vvs, pkt_len); - /* Do not send zero length OP_RW pkt */ - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) - return pkt_len; + /* Do not send zero length OP_RW pkt */ + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) + return pkt_len; + } skb = virtio_transport_alloc_skb(info, pkt_len, src_cid, src_port, dst_cid, dst_port, &err); if (!skb) { - virtio_transport_put_credit(vvs, pkt_len); + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) + virtio_transport_put_credit(vvs, pkt_len); return err; } @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, } EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); +static ssize_t +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, size_t len) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + struct sk_buff *skb; + size_t total = 0; + u32 free_space; + int err = -EFAULT; + + spin_lock_bh(&vvs->rx_lock); + if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) { + skb = __skb_dequeue(&vvs->rx_queue); + + total = len; + if (total > skb->len - vsock_metadata(skb)->off) + total = skb->len - vsock_metadata(skb)->off; + else if (total < skb->len - vsock_metadata(skb)->off) + msg->msg_flags |= MSG_TRUNC; + + /* sk_lock is held by caller so no one else can dequeue. + * Unlock rx_lock since memcpy_to_msg() may sleep. + */ + spin_unlock_bh(&vvs->rx_lock); + + err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total); + if (err) + return err; + + spin_lock_bh(&vvs->rx_lock); + + virtio_transport_dec_rx_pkt(vvs, skb); + consume_skb(skb); + } + + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); + + spin_unlock_bh(&vvs->rx_lock); + + if (total > 0 && msg->msg_name) { + /* Provide the address of the sender. */ + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); + + vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid), + le32_to_cpu(vsock_hdr(skb)->src_port)); + msg->msg_namelen = sizeof(*vm_addr); + } + return total; +} + +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk) +{ + return virtio_transport_stream_has_data(vsk); +} + int virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, struct msghdr *msg, @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg, size_t len, int flags) { - return -EOPNOTSUPP; + struct sock *sk; + size_t err = 0; + long timeout; + + DEFINE_WAIT(wait); + + sk = &vsk->sk; + err = 0; + + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK) + return -EOPNOTSUPP; + + lock_sock(sk); + + if (!len) + goto out; + + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + while (1) { + s64 ready; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + ready = virtio_transport_dgram_has_data(vsk); + + if (ready == 0) { + if (timeout == 0) { + err = -EAGAIN; + finish_wait(sk_sleep(sk), &wait); + break; + } + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + finish_wait(sk_sleep(sk), &wait); + break; + } else if (timeout == 0) { + err = -EAGAIN; + finish_wait(sk_sleep(sk), &wait); + break; + } + } else { + finish_wait(sk_sleep(sk), &wait); + + if (ready < 0) { + err = -ENOMEM; + goto out; + } + + err = virtio_transport_dgram_do_dequeue(vsk, msg, len); + break; + } + } +out: + release_sock(sk); + return err; } EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); int virtio_transport_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr) { - return -EOPNOTSUPP; + return vsock_bind_stream(vsk, addr); } EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); bool virtio_transport_dgram_allow(u32 cid, u32 port) { - return false; + return true; } EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk, struct msghdr *msg, size_t dgram_len) { - return -EOPNOTSUPP; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RW, + .msg = msg, + .pkt_len = dgram_len, + .vsk = vsk, + .remote_cid = remote_addr->svm_cid, + .remote_port = remote_addr->svm_port, + }; + + return virtio_transport_send_pkt_info(vsk, &info); } EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk, struct virtio_vsock_hdr *hdr = vsock_hdr(skb); int err = 0; + if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) { + virtio_transport_recv_enqueue(vsk, skb); + sk->sk_data_ready(sk); + return err; + } + switch (le16_to_cpu(hdr->op)) { case VIRTIO_VSOCK_OP_RW: virtio_transport_recv_enqueue(vsk, skb); @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, static bool virtio_transport_valid_type(u16 type) { return (type == VIRTIO_VSOCK_TYPE_STREAM) || - (type == VIRTIO_VSOCK_TYPE_SEQPACKET); + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || + (type == VIRTIO_VSOCK_TYPE_DGRAM); } /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, goto free_pkt; } + if (sk->sk_type == SOCK_DGRAM) { + virtio_transport_recv_connected(sk, skb); + goto out; + } + space_available = virtio_transport_space_update(sk, skb); /* Update CID in case it has changed after a transport reset event */ @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, break; } +out: release_sock(sk); /* Release refcnt obtained when we fetched this socket out of the