diff mbox series

[5/6] virtio/vsock: add support for dgram

Message ID 3cb082f1c88f3f2ef1fc250dbc0745fb79c745c7.1660362668.git.bobby.eshleman@bytedance.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series virtio/vsock: introduce dgrams, sk_buff, and qdisc | expand

Checks

Context Check Description
netdev/tree_selection success Guessed tree name to be net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 1028 this patch: 1118
netdev/cc_maintainers success CCed 11 of 11 maintainers
netdev/build_clang fail Errors and warnings before: 7 this patch: 10
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 1028 this patch: 1118
netdev/checkpatch warning WARNING: From:/Signed-off-by: email address mismatch: 'From: Bobby Eshleman <bobby.eshleman@gmail.com>' != 'Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>' WARNING: line length of 83 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 2 this patch: 2
netdev/source_inline success Was 0 now: 0

Commit Message

Bobby Eshleman Aug. 15, 2022, 5:56 p.m. UTC
This patch supports dgram in virtio and on the vhost side.

Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
---
 drivers/vhost/vsock.c                   |   2 +-
 include/net/af_vsock.h                  |   2 +
 include/uapi/linux/virtio_vsock.h       |   1 +
 net/vmw_vsock/af_vsock.c                |  26 +++-
 net/vmw_vsock/virtio_transport.c        |   2 +-
 net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++--
 6 files changed, 186 insertions(+), 20 deletions(-)

Comments

kernel test robot Aug. 15, 2022, 9:02 p.m. UTC | #1
Hi Bobby,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on mst-vhost/linux-next]
[also build test WARNING on linus/master v6.0-rc1 next-20220815]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Bobby-Eshleman/virtio-vsock-introduce-dgrams-sk_buff-and-qdisc/20220816-015812
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20220816/202208160405.cG02E3MZ-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/cbb332da78c86ac574688831ed6f404d04d506db
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Bobby-Eshleman/virtio-vsock-introduce-dgrams-sk_buff-and-qdisc/20220816-015812
        git checkout cbb332da78c86ac574688831ed6f404d04d506db
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k SHELL=/bin/bash net/vmw_vsock/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   net/vmw_vsock/virtio_transport_common.c: In function 'virtio_transport_dgram_do_dequeue':
>> net/vmw_vsock/virtio_transport_common.c:605:13: warning: variable 'free_space' set but not used [-Wunused-but-set-variable]
     605 |         u32 free_space;
         |             ^~~~~~~~~~


vim +/free_space +605 net/vmw_vsock/virtio_transport_common.c

   597	
   598	static ssize_t
   599	virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
   600					  struct msghdr *msg, size_t len)
   601	{
   602		struct virtio_vsock_sock *vvs = vsk->trans;
   603		struct sk_buff *skb;
   604		size_t total = 0;
 > 605		u32 free_space;
   606		int err = -EFAULT;
   607	
   608		spin_lock_bh(&vvs->rx_lock);
   609		if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
   610			skb = __skb_dequeue(&vvs->rx_queue);
   611	
   612			total = len;
   613			if (total > skb->len - vsock_metadata(skb)->off)
   614				total = skb->len - vsock_metadata(skb)->off;
   615			else if (total < skb->len - vsock_metadata(skb)->off)
   616				msg->msg_flags |= MSG_TRUNC;
   617	
   618			/* sk_lock is held by caller so no one else can dequeue.
   619			 * Unlock rx_lock since memcpy_to_msg() may sleep.
   620			 */
   621			spin_unlock_bh(&vvs->rx_lock);
   622	
   623			err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total);
   624			if (err)
   625				return err;
   626	
   627			spin_lock_bh(&vvs->rx_lock);
   628	
   629			virtio_transport_dec_rx_pkt(vvs, skb);
   630			consume_skb(skb);
   631		}
   632	
   633		free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
   634	
   635		spin_unlock_bh(&vvs->rx_lock);
   636	
   637		if (total > 0 && msg->msg_name) {
   638			/* Provide the address of the sender. */
   639			DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
   640	
   641			vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
   642					le32_to_cpu(vsock_hdr(skb)->src_port));
   643			msg->msg_namelen = sizeof(*vm_addr);
   644		}
   645		return total;
   646	}
   647
Bobby Eshleman Aug. 16, 2022, 2:32 a.m. UTC | #2
CC'ing virtio-dev@lists.oasis-open.org

On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
> This patch supports dgram in virtio and on the vhost side.
> 
> Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> ---
>  drivers/vhost/vsock.c                   |   2 +-
>  include/net/af_vsock.h                  |   2 +
>  include/uapi/linux/virtio_vsock.h       |   1 +
>  net/vmw_vsock/af_vsock.c                |  26 +++-
>  net/vmw_vsock/virtio_transport.c        |   2 +-
>  net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++--
>  6 files changed, 186 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index a5d1bdb786fe..3dc72a5647ca 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
>  	int ret;
>  
>  	ret = vsock_core_register(&vhost_transport.transport,
> -				  VSOCK_TRANSPORT_F_H2G);
> +				  VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM);
>  	if (ret < 0)
>  		return ret;
>  
> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> index 1c53c4c4d88f..37e55c81e4df 100644
> --- a/include/net/af_vsock.h
> +++ b/include/net/af_vsock.h
> @@ -78,6 +78,8 @@ struct vsock_sock {
>  s64 vsock_stream_has_data(struct vsock_sock *vsk);
>  s64 vsock_stream_has_space(struct vsock_sock *vsk);
>  struct sock *vsock_create_connected(struct sock *parent);
> +int vsock_bind_stream(struct vsock_sock *vsk,
> +		      struct sockaddr_vm *addr);
>  
>  /**** TRANSPORT ****/
>  
> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
> index 857df3a3a70d..0975b9c88292 100644
> --- a/include/uapi/linux/virtio_vsock.h
> +++ b/include/uapi/linux/virtio_vsock.h
> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
>  enum virtio_vsock_type {
>  	VIRTIO_VSOCK_TYPE_STREAM = 1,
>  	VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> +	VIRTIO_VSOCK_TYPE_DGRAM = 3,
>  };
>  
>  enum virtio_vsock_op {
> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> index 1893f8aafa48..87e4ae1866d3 100644
> --- a/net/vmw_vsock/af_vsock.c
> +++ b/net/vmw_vsock/af_vsock.c
> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
>  	return 0;
>  }
>  
> +int vsock_bind_stream(struct vsock_sock *vsk,
> +		      struct sockaddr_vm *addr)
> +{
> +	int retval;
> +
> +	spin_lock_bh(&vsock_table_lock);
> +	retval = __vsock_bind_connectible(vsk, addr);
> +	spin_unlock_bh(&vsock_table_lock);
> +
> +	return retval;
> +}
> +EXPORT_SYMBOL(vsock_bind_stream);
> +
>  static int __vsock_bind_dgram(struct vsock_sock *vsk,
>  			      struct sockaddr_vm *addr)
>  {
> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features)
>  	}
>  
>  	if (features & VSOCK_TRANSPORT_F_DGRAM) {
> -		if (t_dgram) {
> -			err = -EBUSY;
> -			goto err_busy;
> +		/* TODO: always chose the G2H variant over others, support nesting later */
> +		if (features & VSOCK_TRANSPORT_F_G2H) {
> +			if (t_dgram)
> +				pr_warn("virtio_vsock: t_dgram already set\n");
> +			t_dgram = t;
> +		}
> +
> +		if (!t_dgram) {
> +			t_dgram = t;
>  		}
> -		t_dgram = t;
>  	}
>  
>  	if (features & VSOCK_TRANSPORT_F_LOCAL) {
> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
> index 073314312683..d4526ca462d2 100644
> --- a/net/vmw_vsock/virtio_transport.c
> +++ b/net/vmw_vsock/virtio_transport.c
> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
>  		return -ENOMEM;
>  
>  	ret = vsock_core_register(&virtio_transport.transport,
> -				  VSOCK_TRANSPORT_F_G2H);
> +				  VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM);
>  	if (ret)
>  		goto out_wq;
>  
> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> index bdf16fff054f..aedb48728677 100644
> --- a/net/vmw_vsock/virtio_transport_common.c
> +++ b/net/vmw_vsock/virtio_transport_common.c
> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
>  
>  static u16 virtio_transport_get_type(struct sock *sk)
>  {
> -	if (sk->sk_type == SOCK_STREAM)
> +	if (sk->sk_type == SOCK_DGRAM)
> +		return VIRTIO_VSOCK_TYPE_DGRAM;
> +	else if (sk->sk_type == SOCK_STREAM)
>  		return VIRTIO_VSOCK_TYPE_STREAM;
>  	else
>  		return VIRTIO_VSOCK_TYPE_SEQPACKET;
> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>  	vvs = vsk->trans;
>  
>  	/* we can send less than pkt_len bytes */
> -	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> -		pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> +	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> +			pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> +		else
> +			return 0;
> +	}
>  
> -	/* virtio_transport_get_credit might return less than pkt_len credit */
> -	pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> +	if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> +		/* virtio_transport_get_credit might return less than pkt_len credit */
> +		pkt_len = virtio_transport_get_credit(vvs, pkt_len);
>  
> -	/* Do not send zero length OP_RW pkt */
> -	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> -		return pkt_len;
> +		/* Do not send zero length OP_RW pkt */
> +		if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> +			return pkt_len;
> +	}
>  
>  	skb = virtio_transport_alloc_skb(info, pkt_len,
>  					 src_cid, src_port,
>  					 dst_cid, dst_port,
>  					 &err);
>  	if (!skb) {
> -		virtio_transport_put_credit(vvs, pkt_len);
> +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> +			virtio_transport_put_credit(vvs, pkt_len);
>  		return err;
>  	}
>  
> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
>  }
>  EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
>  
> +static ssize_t
> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
> +				  struct msghdr *msg, size_t len)
> +{
> +	struct virtio_vsock_sock *vvs = vsk->trans;
> +	struct sk_buff *skb;
> +	size_t total = 0;
> +	u32 free_space;
> +	int err = -EFAULT;
> +
> +	spin_lock_bh(&vvs->rx_lock);
> +	if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
> +		skb = __skb_dequeue(&vvs->rx_queue);
> +
> +		total = len;
> +		if (total > skb->len - vsock_metadata(skb)->off)
> +			total = skb->len - vsock_metadata(skb)->off;
> +		else if (total < skb->len - vsock_metadata(skb)->off)
> +			msg->msg_flags |= MSG_TRUNC;
> +
> +		/* sk_lock is held by caller so no one else can dequeue.
> +		 * Unlock rx_lock since memcpy_to_msg() may sleep.
> +		 */
> +		spin_unlock_bh(&vvs->rx_lock);
> +
> +		err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total);
> +		if (err)
> +			return err;
> +
> +		spin_lock_bh(&vvs->rx_lock);
> +
> +		virtio_transport_dec_rx_pkt(vvs, skb);
> +		consume_skb(skb);
> +	}
> +
> +	free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
> +
> +	spin_unlock_bh(&vvs->rx_lock);
> +
> +	if (total > 0 && msg->msg_name) {
> +		/* Provide the address of the sender. */
> +		DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
> +
> +		vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
> +				le32_to_cpu(vsock_hdr(skb)->src_port));
> +		msg->msg_namelen = sizeof(*vm_addr);
> +	}
> +	return total;
> +}
> +
> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk)
> +{
> +	return virtio_transport_stream_has_data(vsk);
> +}
> +
>  int
>  virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
>  				   struct msghdr *msg,
> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
>  			       struct msghdr *msg,
>  			       size_t len, int flags)
>  {
> -	return -EOPNOTSUPP;
> +	struct sock *sk;
> +	size_t err = 0;
> +	long timeout;
> +
> +	DEFINE_WAIT(wait);
> +
> +	sk = &vsk->sk;
> +	err = 0;
> +
> +	if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK)
> +		return -EOPNOTSUPP;
> +
> +	lock_sock(sk);
> +
> +	if (!len)
> +		goto out;
> +
> +	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> +
> +	while (1) {
> +		s64 ready;
> +
> +		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
> +		ready = virtio_transport_dgram_has_data(vsk);
> +
> +		if (ready == 0) {
> +			if (timeout == 0) {
> +				err = -EAGAIN;
> +				finish_wait(sk_sleep(sk), &wait);
> +				break;
> +			}
> +
> +			release_sock(sk);
> +			timeout = schedule_timeout(timeout);
> +			lock_sock(sk);
> +
> +			if (signal_pending(current)) {
> +				err = sock_intr_errno(timeout);
> +				finish_wait(sk_sleep(sk), &wait);
> +				break;
> +			} else if (timeout == 0) {
> +				err = -EAGAIN;
> +				finish_wait(sk_sleep(sk), &wait);
> +				break;
> +			}
> +		} else {
> +			finish_wait(sk_sleep(sk), &wait);
> +
> +			if (ready < 0) {
> +				err = -ENOMEM;
> +				goto out;
> +			}
> +
> +			err = virtio_transport_dgram_do_dequeue(vsk, msg, len);
> +			break;
> +		}
> +	}
> +out:
> +	release_sock(sk);
> +	return err;
>  }
>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
>  
> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
>  int virtio_transport_dgram_bind(struct vsock_sock *vsk,
>  				struct sockaddr_vm *addr)
>  {
> -	return -EOPNOTSUPP;
> +	return vsock_bind_stream(vsk, addr);
>  }
>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
>  
>  bool virtio_transport_dgram_allow(u32 cid, u32 port)
>  {
> -	return false;
> +	return true;
>  }
>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
>  
> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
>  			       struct msghdr *msg,
>  			       size_t dgram_len)
>  {
> -	return -EOPNOTSUPP;
> +	struct virtio_vsock_pkt_info info = {
> +		.op = VIRTIO_VSOCK_OP_RW,
> +		.msg = msg,
> +		.pkt_len = dgram_len,
> +		.vsk = vsk,
> +		.remote_cid = remote_addr->svm_cid,
> +		.remote_port = remote_addr->svm_port,
> +	};
> +
> +	return virtio_transport_send_pkt_info(vsk, &info);
>  }
>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
>  
> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk,
>  	struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
>  	int err = 0;
>  
> +	if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
> +		virtio_transport_recv_enqueue(vsk, skb);
> +		sk->sk_data_ready(sk);
> +		return err;
> +	}
> +
>  	switch (le16_to_cpu(hdr->op)) {
>  	case VIRTIO_VSOCK_OP_RW:
>  		virtio_transport_recv_enqueue(vsk, skb);
> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
>  static bool virtio_transport_valid_type(u16 type)
>  {
>  	return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> -	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> +	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> +	       (type == VIRTIO_VSOCK_TYPE_DGRAM);
>  }
>  
>  /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>  		goto free_pkt;
>  	}
>  
> +	if (sk->sk_type == SOCK_DGRAM) {
> +		virtio_transport_recv_connected(sk, skb);
> +		goto out;
> +	}
> +
>  	space_available = virtio_transport_space_update(sk, skb);
>  
>  	/* Update CID in case it has changed after a transport reset event */
> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>  		break;
>  	}
>  
> +out:
>  	release_sock(sk);
>  
>  	/* Release refcnt obtained when we fetched this socket out of the
> -- 
> 2.35.1
>
Bobby Eshleman Aug. 16, 2022, 9:57 a.m. UTC | #3
On Wed, Aug 17, 2022 at 05:01:00AM +0000, Arseniy Krasnov wrote:
> On 16.08.2022 05:32, Bobby Eshleman wrote:
> > CC'ing virtio-dev@lists.oasis-open.org
> > 
> > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
> >> This patch supports dgram in virtio and on the vhost side.
> Hello,
> 
> sorry, i don't understand, how this maintains message boundaries? Or it
> is unnecessary for SOCK_DGRAM?
> 
> Thanks

If I understand your question, the length is included in the header, so
receivers always know that header start + header length + payload length
marks the message boundary.

> >>
> >> Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
> >> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> >> ---
> >>  drivers/vhost/vsock.c                   |   2 +-
> >>  include/net/af_vsock.h                  |   2 +
> >>  include/uapi/linux/virtio_vsock.h       |   1 +
> >>  net/vmw_vsock/af_vsock.c                |  26 +++-
> >>  net/vmw_vsock/virtio_transport.c        |   2 +-
> >>  net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++--
> >>  6 files changed, 186 insertions(+), 20 deletions(-)
> >>
> >> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> >> index a5d1bdb786fe..3dc72a5647ca 100644
> >> --- a/drivers/vhost/vsock.c
> >> +++ b/drivers/vhost/vsock.c
> >> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
> >>  	int ret;
> >>  
> >>  	ret = vsock_core_register(&vhost_transport.transport,
> >> -				  VSOCK_TRANSPORT_F_H2G);
> >> +				  VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM);
> >>  	if (ret < 0)
> >>  		return ret;
> >>  
> >> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> >> index 1c53c4c4d88f..37e55c81e4df 100644
> >> --- a/include/net/af_vsock.h
> >> +++ b/include/net/af_vsock.h
> >> @@ -78,6 +78,8 @@ struct vsock_sock {
> >>  s64 vsock_stream_has_data(struct vsock_sock *vsk);
> >>  s64 vsock_stream_has_space(struct vsock_sock *vsk);
> >>  struct sock *vsock_create_connected(struct sock *parent);
> >> +int vsock_bind_stream(struct vsock_sock *vsk,
> >> +		      struct sockaddr_vm *addr);
> >>  
> >>  /**** TRANSPORT ****/
> >>  
> >> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
> >> index 857df3a3a70d..0975b9c88292 100644
> >> --- a/include/uapi/linux/virtio_vsock.h
> >> +++ b/include/uapi/linux/virtio_vsock.h
> >> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> >>  enum virtio_vsock_type {
> >>  	VIRTIO_VSOCK_TYPE_STREAM = 1,
> >>  	VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> >> +	VIRTIO_VSOCK_TYPE_DGRAM = 3,
> >>  };
> >>  
> >>  enum virtio_vsock_op {
> >> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> >> index 1893f8aafa48..87e4ae1866d3 100644
> >> --- a/net/vmw_vsock/af_vsock.c
> >> +++ b/net/vmw_vsock/af_vsock.c
> >> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
> >>  	return 0;
> >>  }
> >>  
> >> +int vsock_bind_stream(struct vsock_sock *vsk,
> >> +		      struct sockaddr_vm *addr)
> >> +{
> >> +	int retval;
> >> +
> >> +	spin_lock_bh(&vsock_table_lock);
> >> +	retval = __vsock_bind_connectible(vsk, addr);
> >> +	spin_unlock_bh(&vsock_table_lock);
> >> +
> >> +	return retval;
> >> +}
> >> +EXPORT_SYMBOL(vsock_bind_stream);
> >> +
> >>  static int __vsock_bind_dgram(struct vsock_sock *vsk,
> >>  			      struct sockaddr_vm *addr)
> >>  {
> >> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features)
> >>  	}
> >>  
> >>  	if (features & VSOCK_TRANSPORT_F_DGRAM) {
> >> -		if (t_dgram) {
> >> -			err = -EBUSY;
> >> -			goto err_busy;
> >> +		/* TODO: always chose the G2H variant over others, support nesting later */
> >> +		if (features & VSOCK_TRANSPORT_F_G2H) {
> >> +			if (t_dgram)
> >> +				pr_warn("virtio_vsock: t_dgram already set\n");
> >> +			t_dgram = t;
> >> +		}
> >> +
> >> +		if (!t_dgram) {
> >> +			t_dgram = t;
> >>  		}
> >> -		t_dgram = t;
> >>  	}
> >>  
> >>  	if (features & VSOCK_TRANSPORT_F_LOCAL) {
> >> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
> >> index 073314312683..d4526ca462d2 100644
> >> --- a/net/vmw_vsock/virtio_transport.c
> >> +++ b/net/vmw_vsock/virtio_transport.c
> >> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
> >>  		return -ENOMEM;
> >>  
> >>  	ret = vsock_core_register(&virtio_transport.transport,
> >> -				  VSOCK_TRANSPORT_F_G2H);
> >> +				  VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM);
> >>  	if (ret)
> >>  		goto out_wq;
> >>  
> >> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> >> index bdf16fff054f..aedb48728677 100644
> >> --- a/net/vmw_vsock/virtio_transport_common.c
> >> +++ b/net/vmw_vsock/virtio_transport_common.c
> >> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> >>  
> >>  static u16 virtio_transport_get_type(struct sock *sk)
> >>  {
> >> -	if (sk->sk_type == SOCK_STREAM)
> >> +	if (sk->sk_type == SOCK_DGRAM)
> >> +		return VIRTIO_VSOCK_TYPE_DGRAM;
> >> +	else if (sk->sk_type == SOCK_STREAM)
> >>  		return VIRTIO_VSOCK_TYPE_STREAM;
> >>  	else
> >>  		return VIRTIO_VSOCK_TYPE_SEQPACKET;
> >> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> >>  	vvs = vsk->trans;
> >>  
> >>  	/* we can send less than pkt_len bytes */
> >> -	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> >> -		pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> >> +	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> >> +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> >> +			pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> >> +		else
> >> +			return 0;
> >> +	}
> >>  
> >> -	/* virtio_transport_get_credit might return less than pkt_len credit */
> >> -	pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> >> +	if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> >> +		/* virtio_transport_get_credit might return less than pkt_len credit */
> >> +		pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> >>  
> >> -	/* Do not send zero length OP_RW pkt */
> >> -	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> >> -		return pkt_len;
> >> +		/* Do not send zero length OP_RW pkt */
> >> +		if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> >> +			return pkt_len;
> >> +	}
> >>  
> >>  	skb = virtio_transport_alloc_skb(info, pkt_len,
> >>  					 src_cid, src_port,
> >>  					 dst_cid, dst_port,
> >>  					 &err);
> >>  	if (!skb) {
> >> -		virtio_transport_put_credit(vvs, pkt_len);
> >> +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> >> +			virtio_transport_put_credit(vvs, pkt_len);
> >>  		return err;
> >>  	}
> >>  
> >> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
> >>  }
> >>  EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
> >>  
> >> +static ssize_t
> >> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
> >> +				  struct msghdr *msg, size_t len)
> >> +{
> >> +	struct virtio_vsock_sock *vvs = vsk->trans;
> >> +	struct sk_buff *skb;
> >> +	size_t total = 0;
> >> +	u32 free_space;
> >> +	int err = -EFAULT;
> >> +
> >> +	spin_lock_bh(&vvs->rx_lock);
> >> +	if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
> >> +		skb = __skb_dequeue(&vvs->rx_queue);
> >> +
> >> +		total = len;
> >> +		if (total > skb->len - vsock_metadata(skb)->off)
> >> +			total = skb->len - vsock_metadata(skb)->off;
> >> +		else if (total < skb->len - vsock_metadata(skb)->off)
> >> +			msg->msg_flags |= MSG_TRUNC;
> >> +
> >> +		/* sk_lock is held by caller so no one else can dequeue.
> >> +		 * Unlock rx_lock since memcpy_to_msg() may sleep.
> >> +		 */
> >> +		spin_unlock_bh(&vvs->rx_lock);
> >> +
> >> +		err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total);
> >> +		if (err)
> >> +			return err;
> >> +
> >> +		spin_lock_bh(&vvs->rx_lock);
> >> +
> >> +		virtio_transport_dec_rx_pkt(vvs, skb);
> >> +		consume_skb(skb);
> >> +	}
> >> +
> >> +	free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
> >> +
> >> +	spin_unlock_bh(&vvs->rx_lock);
> >> +
> >> +	if (total > 0 && msg->msg_name) {
> >> +		/* Provide the address of the sender. */
> >> +		DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
> >> +
> >> +		vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
> >> +				le32_to_cpu(vsock_hdr(skb)->src_port));
> >> +		msg->msg_namelen = sizeof(*vm_addr);
> >> +	}
> >> +	return total;
> >> +}
> >> +
> >> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk)
> >> +{
> >> +	return virtio_transport_stream_has_data(vsk);
> >> +}
> >> +
> >>  int
> >>  virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
> >>  				   struct msghdr *msg,
> >> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
> >>  			       struct msghdr *msg,
> >>  			       size_t len, int flags)
> >>  {
> >> -	return -EOPNOTSUPP;
> >> +	struct sock *sk;
> >> +	size_t err = 0;
> >> +	long timeout;
> >> +
> >> +	DEFINE_WAIT(wait);
> >> +
> >> +	sk = &vsk->sk;
> >> +	err = 0;
> >> +
> >> +	if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK)
> >> +		return -EOPNOTSUPP;
> >> +
> >> +	lock_sock(sk);
> >> +
> >> +	if (!len)
> >> +		goto out;
> >> +
> >> +	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> >> +
> >> +	while (1) {
> >> +		s64 ready;
> >> +
> >> +		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
> >> +		ready = virtio_transport_dgram_has_data(vsk);
> >> +
> >> +		if (ready == 0) {
> >> +			if (timeout == 0) {
> >> +				err = -EAGAIN;
> >> +				finish_wait(sk_sleep(sk), &wait);
> >> +				break;
> >> +			}
> >> +
> >> +			release_sock(sk);
> >> +			timeout = schedule_timeout(timeout);
> >> +			lock_sock(sk);
> >> +
> >> +			if (signal_pending(current)) {
> >> +				err = sock_intr_errno(timeout);
> >> +				finish_wait(sk_sleep(sk), &wait);
> >> +				break;
> >> +			} else if (timeout == 0) {
> >> +				err = -EAGAIN;
> >> +				finish_wait(sk_sleep(sk), &wait);
> >> +				break;
> >> +			}
> >> +		} else {
> >> +			finish_wait(sk_sleep(sk), &wait);
> >> +
> >> +			if (ready < 0) {
> >> +				err = -ENOMEM;
> >> +				goto out;
> >> +			}
> >> +
> >> +			err = virtio_transport_dgram_do_dequeue(vsk, msg, len);
> >> +			break;
> >> +		}
> >> +	}
> >> +out:
> >> +	release_sock(sk);
> >> +	return err;
> >>  }
> >>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
> >>  
> >> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> >>  int virtio_transport_dgram_bind(struct vsock_sock *vsk,
> >>  				struct sockaddr_vm *addr)
> >>  {
> >> -	return -EOPNOTSUPP;
> >> +	return vsock_bind_stream(vsk, addr);
> >>  }
> >>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
> >>  
> >>  bool virtio_transport_dgram_allow(u32 cid, u32 port)
> >>  {
> >> -	return false;
> >> +	return true;
> >>  }
> >>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
> >>  
> >> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
> >>  			       struct msghdr *msg,
> >>  			       size_t dgram_len)
> >>  {
> >> -	return -EOPNOTSUPP;
> >> +	struct virtio_vsock_pkt_info info = {
> >> +		.op = VIRTIO_VSOCK_OP_RW,
> >> +		.msg = msg,
> >> +		.pkt_len = dgram_len,
> >> +		.vsk = vsk,
> >> +		.remote_cid = remote_addr->svm_cid,
> >> +		.remote_port = remote_addr->svm_port,
> >> +	};
> >> +
> >> +	return virtio_transport_send_pkt_info(vsk, &info);
> >>  }
> >>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
> >>  
> >> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk,
> >>  	struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
> >>  	int err = 0;
> >>  
> >> +	if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
> >> +		virtio_transport_recv_enqueue(vsk, skb);
> >> +		sk->sk_data_ready(sk);
> >> +		return err;
> >> +	}
> >> +
> >>  	switch (le16_to_cpu(hdr->op)) {
> >>  	case VIRTIO_VSOCK_OP_RW:
> >>  		virtio_transport_recv_enqueue(vsk, skb);
> >> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
> >>  static bool virtio_transport_valid_type(u16 type)
> >>  {
> >>  	return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> >> -	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> >> +	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> >> +	       (type == VIRTIO_VSOCK_TYPE_DGRAM);
> >>  }
> >>  
> >>  /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
> >> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> >>  		goto free_pkt;
> >>  	}
> >>  
> >> +	if (sk->sk_type == SOCK_DGRAM) {
> >> +		virtio_transport_recv_connected(sk, skb);
> >> +		goto out;
> >> +	}
> >> +
> >>  	space_available = virtio_transport_space_update(sk, skb);
> >>  
> >>  	/* Update CID in case it has changed after a transport reset event */
> >> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> >>  		break;
> >>  	}
> >>  
> >> +out:
> >>  	release_sock(sk);
> >>  
> >>  	/* Release refcnt obtained when we fetched this socket out of the
> >> -- 
> >> 2.35.1
> >>
> > 
> > ---------------------------------------------------------------------
> > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
> > 
>
Bobby Eshleman Aug. 16, 2022, 9:58 a.m. UTC | #4
On Wed, Aug 17, 2022 at 05:42:08AM +0000, Arseniy Krasnov wrote:
> On 17.08.2022 08:01, Arseniy Krasnov wrote:
> > On 16.08.2022 05:32, Bobby Eshleman wrote:
> >> CC'ing virtio-dev@lists.oasis-open.org
> >>
> >> On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
> >>> This patch supports dgram in virtio and on the vhost side.
> > Hello,
> > 
> > sorry, i don't understand, how this maintains message boundaries? Or it
> > is unnecessary for SOCK_DGRAM?
> > 
> > Thanks
> >>>
> >>> Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
> >>> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> >>> ---
> >>>  drivers/vhost/vsock.c                   |   2 +-
> >>>  include/net/af_vsock.h                  |   2 +
> >>>  include/uapi/linux/virtio_vsock.h       |   1 +
> >>>  net/vmw_vsock/af_vsock.c                |  26 +++-
> >>>  net/vmw_vsock/virtio_transport.c        |   2 +-
> >>>  net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++--
> >>>  6 files changed, 186 insertions(+), 20 deletions(-)
> >>>
> >>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> >>> index a5d1bdb786fe..3dc72a5647ca 100644
> >>> --- a/drivers/vhost/vsock.c
> >>> +++ b/drivers/vhost/vsock.c
> >>> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
> >>>  	int ret;
> >>>  
> >>>  	ret = vsock_core_register(&vhost_transport.transport,
> >>> -				  VSOCK_TRANSPORT_F_H2G);
> >>> +				  VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM);
> >>>  	if (ret < 0)
> >>>  		return ret;
> >>>  
> >>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> >>> index 1c53c4c4d88f..37e55c81e4df 100644
> >>> --- a/include/net/af_vsock.h
> >>> +++ b/include/net/af_vsock.h
> >>> @@ -78,6 +78,8 @@ struct vsock_sock {
> >>>  s64 vsock_stream_has_data(struct vsock_sock *vsk);
> >>>  s64 vsock_stream_has_space(struct vsock_sock *vsk);
> >>>  struct sock *vsock_create_connected(struct sock *parent);
> >>> +int vsock_bind_stream(struct vsock_sock *vsk,
> >>> +		      struct sockaddr_vm *addr);
> >>>  
> >>>  /**** TRANSPORT ****/
> >>>  
> >>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
> >>> index 857df3a3a70d..0975b9c88292 100644
> >>> --- a/include/uapi/linux/virtio_vsock.h
> >>> +++ b/include/uapi/linux/virtio_vsock.h
> >>> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> >>>  enum virtio_vsock_type {
> >>>  	VIRTIO_VSOCK_TYPE_STREAM = 1,
> >>>  	VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> >>> +	VIRTIO_VSOCK_TYPE_DGRAM = 3,
> >>>  };
> >>>  
> >>>  enum virtio_vsock_op {
> >>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> >>> index 1893f8aafa48..87e4ae1866d3 100644
> >>> --- a/net/vmw_vsock/af_vsock.c
> >>> +++ b/net/vmw_vsock/af_vsock.c
> >>> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
> >>>  	return 0;
> >>>  }
> >>>  
> >>> +int vsock_bind_stream(struct vsock_sock *vsk,
> >>> +		      struct sockaddr_vm *addr)
> >>> +{
> >>> +	int retval;
> >>> +
> >>> +	spin_lock_bh(&vsock_table_lock);
> >>> +	retval = __vsock_bind_connectible(vsk, addr);
> >>> +	spin_unlock_bh(&vsock_table_lock);
> >>> +
> >>> +	return retval;
> >>> +}
> >>> +EXPORT_SYMBOL(vsock_bind_stream);
> >>> +
> >>>  static int __vsock_bind_dgram(struct vsock_sock *vsk,
> >>>  			      struct sockaddr_vm *addr)
> >>>  {
> >>> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features)
> >>>  	}
> >>>  
> >>>  	if (features & VSOCK_TRANSPORT_F_DGRAM) {
> >>> -		if (t_dgram) {
> >>> -			err = -EBUSY;
> >>> -			goto err_busy;
> >>> +		/* TODO: always chose the G2H variant over others, support nesting later */
> >>> +		if (features & VSOCK_TRANSPORT_F_G2H) {
> >>> +			if (t_dgram)
> >>> +				pr_warn("virtio_vsock: t_dgram already set\n");
> >>> +			t_dgram = t;
> >>> +		}
> >>> +
> >>> +		if (!t_dgram) {
> >>> +			t_dgram = t;
> >>>  		}
> >>> -		t_dgram = t;
> >>>  	}
> >>>  
> >>>  	if (features & VSOCK_TRANSPORT_F_LOCAL) {
> >>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
> >>> index 073314312683..d4526ca462d2 100644
> >>> --- a/net/vmw_vsock/virtio_transport.c
> >>> +++ b/net/vmw_vsock/virtio_transport.c
> >>> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
> >>>  		return -ENOMEM;
> >>>  
> >>>  	ret = vsock_core_register(&virtio_transport.transport,
> >>> -				  VSOCK_TRANSPORT_F_G2H);
> >>> +				  VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM);
> >>>  	if (ret)
> >>>  		goto out_wq;
> >>>  
> >>> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> >>> index bdf16fff054f..aedb48728677 100644
> >>> --- a/net/vmw_vsock/virtio_transport_common.c
> >>> +++ b/net/vmw_vsock/virtio_transport_common.c
> >>> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> >>>  
> >>>  static u16 virtio_transport_get_type(struct sock *sk)
> >>>  {
> >>> -	if (sk->sk_type == SOCK_STREAM)
> >>> +	if (sk->sk_type == SOCK_DGRAM)
> >>> +		return VIRTIO_VSOCK_TYPE_DGRAM;
> >>> +	else if (sk->sk_type == SOCK_STREAM)
> >>>  		return VIRTIO_VSOCK_TYPE_STREAM;
> >>>  	else
> >>>  		return VIRTIO_VSOCK_TYPE_SEQPACKET;
> >>> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> >>>  	vvs = vsk->trans;
> >>>  
> >>>  	/* we can send less than pkt_len bytes */
> >>> -	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> >>> -		pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> >>> +	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> >>> +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> >>> +			pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> >>> +		else
> >>> +			return 0;
> >>> +	}
> >>>  
> >>> -	/* virtio_transport_get_credit might return less than pkt_len credit */
> >>> -	pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> >>> +	if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> >>> +		/* virtio_transport_get_credit might return less than pkt_len credit */
> >>> +		pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> >>>  
> >>> -	/* Do not send zero length OP_RW pkt */
> >>> -	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> >>> -		return pkt_len;
> >>> +		/* Do not send zero length OP_RW pkt */
> >>> +		if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> >>> +			return pkt_len;
> >>> +	}
> >>>  
> >>>  	skb = virtio_transport_alloc_skb(info, pkt_len,
> >>>  					 src_cid, src_port,
> >>>  					 dst_cid, dst_port,
> >>>  					 &err);
> >>>  	if (!skb) {
> >>> -		virtio_transport_put_credit(vvs, pkt_len);
> >>> +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> >>> +			virtio_transport_put_credit(vvs, pkt_len);
> >>>  		return err;
> >>>  	}
> >>>  
> >>> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
> >>>  }
> >>>  EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
> >>>  
> >>> +static ssize_t
> >>> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
> >>> +				  struct msghdr *msg, size_t len)
> >>> +{
> >>> +	struct virtio_vsock_sock *vvs = vsk->trans;
> >>> +	struct sk_buff *skb;
> >>> +	size_t total = 0;
> >>> +	u32 free_space;
> >>> +	int err = -EFAULT;
> >>> +
> >>> +	spin_lock_bh(&vvs->rx_lock);
> >>> +	if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
> >>> +		skb = __skb_dequeue(&vvs->rx_queue);
> >>> +
> >>> +		total = len;
> >>> +		if (total > skb->len - vsock_metadata(skb)->off)
> >>> +			total = skb->len - vsock_metadata(skb)->off;
> >>> +		else if (total < skb->len - vsock_metadata(skb)->off)
> >>> +			msg->msg_flags |= MSG_TRUNC;
> >>> +
> >>> +		/* sk_lock is held by caller so no one else can dequeue.
> >>> +		 * Unlock rx_lock since memcpy_to_msg() may sleep.
> >>> +		 */
> >>> +		spin_unlock_bh(&vvs->rx_lock);
> >>> +
> >>> +		err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total);
> >>> +		if (err)
> >>> +			return err;
> >>> +
> >>> +		spin_lock_bh(&vvs->rx_lock);
> >>> +
> >>> +		virtio_transport_dec_rx_pkt(vvs, skb);
> >>> +		consume_skb(skb);
> >>> +	}
> >>> +
> >>> +	free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
> >>> +
> >>> +	spin_unlock_bh(&vvs->rx_lock);
> >>> +
> >>> +	if (total > 0 && msg->msg_name) {
> >>> +		/* Provide the address of the sender. */
> >>> +		DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
> >>> +
> >>> +		vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
> >>> +				le32_to_cpu(vsock_hdr(skb)->src_port));
> >>> +		msg->msg_namelen = sizeof(*vm_addr);
> >>> +	}
> >>> +	return total;
> >>> +}
> >>> +
> >>> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk)
> >>> +{
> >>> +	return virtio_transport_stream_has_data(vsk);
> >>> +}
> >>> +
> >>>  int
> >>>  virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
> >>>  				   struct msghdr *msg,
> >>> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
> >>>  			       struct msghdr *msg,
> >>>  			       size_t len, int flags)
> >>>  {
> >>> -	return -EOPNOTSUPP;
> >>> +	struct sock *sk;
> >>> +	size_t err = 0;
> >>> +	long timeout;
> >>> +
> >>> +	DEFINE_WAIT(wait);
> >>> +
> >>> +	sk = &vsk->sk;
> >>> +	err = 0;
> >>> +
> >>> +	if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK)
> >>> +		return -EOPNOTSUPP;
> >>> +
> >>> +	lock_sock(sk);
> >>> +
> >>> +	if (!len)
> >>> +		goto out;
> >>> +
> >>> +	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> >>> +
> >>> +	while (1) {
> >>> +		s64 ready;
> >>> +
> >>> +		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
> >>> +		ready = virtio_transport_dgram_has_data(vsk);
> >>> +
> >>> +		if (ready == 0) {
> >>> +			if (timeout == 0) {
> >>> +				err = -EAGAIN;
> >>> +				finish_wait(sk_sleep(sk), &wait);
> >>> +				break;
> >>> +			}
> >>> +
> >>> +			release_sock(sk);
> >>> +			timeout = schedule_timeout(timeout);
> >>> +			lock_sock(sk);
> >>> +
> >>> +			if (signal_pending(current)) {
> >>> +				err = sock_intr_errno(timeout);
> >>> +				finish_wait(sk_sleep(sk), &wait);
> >>> +				break;
> >>> +			} else if (timeout == 0) {
> >>> +				err = -EAGAIN;
> >>> +				finish_wait(sk_sleep(sk), &wait);
> >>> +				break;
> >>> +			}
> >>> +		} else {
> >>> +			finish_wait(sk_sleep(sk), &wait);
> >>> +
> >>> +			if (ready < 0) {
> >>> +				err = -ENOMEM;
> >>> +				goto out;
> >>> +			}
> >>> +
> >>> +			err = virtio_transport_dgram_do_dequeue(vsk, msg, len);
> >>> +			break;
> >>> +		}
> >>> +	}
> >>> +out:
> >>> +	release_sock(sk);
> >>> +	return err;
> >>>  }
> >>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
> ^^^
> May be, this generic data waiting logic should be in af_vsock.c, as for stream/seqpacket?
> In this way, another transport which supports SOCK_DGRAM could reuse it.

I think that is a great idea. I'll test that change for v2.

Thanks.

> >>>  
> >>> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> >>>  int virtio_transport_dgram_bind(struct vsock_sock *vsk,
> >>>  				struct sockaddr_vm *addr)
> >>>  {
> >>> -	return -EOPNOTSUPP;
> >>> +	return vsock_bind_stream(vsk, addr);
> >>>  }
> >>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
> >>>  
> >>>  bool virtio_transport_dgram_allow(u32 cid, u32 port)
> >>>  {
> >>> -	return false;
> >>> +	return true;
> >>>  }
> >>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
> >>>  
> >>> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
> >>>  			       struct msghdr *msg,
> >>>  			       size_t dgram_len)
> >>>  {
> >>> -	return -EOPNOTSUPP;
> >>> +	struct virtio_vsock_pkt_info info = {
> >>> +		.op = VIRTIO_VSOCK_OP_RW,
> >>> +		.msg = msg,
> >>> +		.pkt_len = dgram_len,
> >>> +		.vsk = vsk,
> >>> +		.remote_cid = remote_addr->svm_cid,
> >>> +		.remote_port = remote_addr->svm_port,
> >>> +	};
> >>> +
> >>> +	return virtio_transport_send_pkt_info(vsk, &info);
> >>>  }
> >>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
> >>>  
> >>> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk,
> >>>  	struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
> >>>  	int err = 0;
> >>>  
> >>> +	if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
> >>> +		virtio_transport_recv_enqueue(vsk, skb);
> >>> +		sk->sk_data_ready(sk);
> >>> +		return err;
> >>> +	}
> >>> +
> >>>  	switch (le16_to_cpu(hdr->op)) {
> >>>  	case VIRTIO_VSOCK_OP_RW:
> >>>  		virtio_transport_recv_enqueue(vsk, skb);
> >>> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
> >>>  static bool virtio_transport_valid_type(u16 type)
> >>>  {
> >>>  	return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> >>> -	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> >>> +	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> >>> +	       (type == VIRTIO_VSOCK_TYPE_DGRAM);
> >>>  }
> >>>  
> >>>  /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
> >>> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> >>>  		goto free_pkt;
> >>>  	}
> >>>  
> >>> +	if (sk->sk_type == SOCK_DGRAM) {
> >>> +		virtio_transport_recv_connected(sk, skb);
> >>> +		goto out;
> >>> +	}
> >>> +
> >>>  	space_available = virtio_transport_space_update(sk, skb);
> >>>  
> >>>  	/* Update CID in case it has changed after a transport reset event */
> >>> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> >>>  		break;
> >>>  	}
> >>>  
> >>> +out:
> >>>  	release_sock(sk);
> >>>  
> >>>  	/* Release refcnt obtained when we fetched this socket out of the
> >>> -- 
> >>> 2.35.1
> >>>
> >>
> >> ---------------------------------------------------------------------
> >> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> >> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
> >>
> > 
>
Bobby Eshleman Aug. 16, 2022, 8:52 p.m. UTC | #5
On Thu, Aug 18, 2022 at 08:35:48AM +0000, Arseniy Krasnov wrote:
> On Tue, 2022-08-16 at 09:58 +0000, Bobby Eshleman wrote:
> > On Wed, Aug 17, 2022 at 05:42:08AM +0000, Arseniy Krasnov wrote:
> > > On 17.08.2022 08:01, Arseniy Krasnov wrote:
> > > > On 16.08.2022 05:32, Bobby Eshleman wrote:
> > > > > CC'ing virtio-dev@lists.oasis-open.org
> > > > > 
> > > > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
> > > > > > This patch supports dgram in virtio and on the vhost side.
> > > > Hello,
> > > > 
> > > > sorry, i don't understand, how this maintains message boundaries?
> > > > Or it
> > > > is unnecessary for SOCK_DGRAM?
> > > > 
> > > > Thanks
> > > > > > Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
> > > > > > Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> > > > > > ---
> > > > > >  drivers/vhost/vsock.c                   |   2 +-
> > > > > >  include/net/af_vsock.h                  |   2 +
> > > > > >  include/uapi/linux/virtio_vsock.h       |   1 +
> > > > > >  net/vmw_vsock/af_vsock.c                |  26 +++-
> > > > > >  net/vmw_vsock/virtio_transport.c        |   2 +-
> > > > > >  net/vmw_vsock/virtio_transport_common.c | 173
> > > > > > ++++++++++++++++++++++--
> > > > > >  6 files changed, 186 insertions(+), 20 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> > > > > > index a5d1bdb786fe..3dc72a5647ca 100644
> > > > > > --- a/drivers/vhost/vsock.c
> > > > > > +++ b/drivers/vhost/vsock.c
> > > > > > @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
> > > > > >  	int ret;
> > > > > >  
> > > > > >  	ret = vsock_core_register(&vhost_transport.transport,
> > > > > > -				  VSOCK_TRANSPORT_F_H2G);
> > > > > > +				  VSOCK_TRANSPORT_F_H2G |
> > > > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > > >  	if (ret < 0)
> > > > > >  		return ret;
> > > > > >  
> > > > > > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> > > > > > index 1c53c4c4d88f..37e55c81e4df 100644
> > > > > > --- a/include/net/af_vsock.h
> > > > > > +++ b/include/net/af_vsock.h
> > > > > > @@ -78,6 +78,8 @@ struct vsock_sock {
> > > > > >  s64 vsock_stream_has_data(struct vsock_sock *vsk);
> > > > > >  s64 vsock_stream_has_space(struct vsock_sock *vsk);
> > > > > >  struct sock *vsock_create_connected(struct sock *parent);
> > > > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > > > +		      struct sockaddr_vm *addr);
> > > > > >  
> > > > > >  /**** TRANSPORT ****/
> > > > > >  
> > > > > > diff --git a/include/uapi/linux/virtio_vsock.h
> > > > > > b/include/uapi/linux/virtio_vsock.h
> > > > > > index 857df3a3a70d..0975b9c88292 100644
> > > > > > --- a/include/uapi/linux/virtio_vsock.h
> > > > > > +++ b/include/uapi/linux/virtio_vsock.h
> > > > > > @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> > > > > >  enum virtio_vsock_type {
> > > > > >  	VIRTIO_VSOCK_TYPE_STREAM = 1,
> > > > > >  	VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> > > > > > +	VIRTIO_VSOCK_TYPE_DGRAM = 3,
> > > > > >  };
> > > > > >  
> > > > > >  enum virtio_vsock_op {
> > > > > > diff --git a/net/vmw_vsock/af_vsock.c
> > > > > > b/net/vmw_vsock/af_vsock.c
> > > > > > index 1893f8aafa48..87e4ae1866d3 100644
> > > > > > --- a/net/vmw_vsock/af_vsock.c
> > > > > > +++ b/net/vmw_vsock/af_vsock.c
> > > > > > @@ -675,6 +675,19 @@ static int
> > > > > > __vsock_bind_connectible(struct vsock_sock *vsk,
> > > > > >  	return 0;
> > > > > >  }
> > > > > >  
> > > > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > > > +		      struct sockaddr_vm *addr)
> > > > > > +{
> > > > > > +	int retval;
> > > > > > +
> > > > > > +	spin_lock_bh(&vsock_table_lock);
> > > > > > +	retval = __vsock_bind_connectible(vsk, addr);
> > > > > > +	spin_unlock_bh(&vsock_table_lock);
> > > > > > +
> > > > > > +	return retval;
> > > > > > +}
> > > > > > +EXPORT_SYMBOL(vsock_bind_stream);
> > > > > > +
> > > > > >  static int __vsock_bind_dgram(struct vsock_sock *vsk,
> > > > > >  			      struct sockaddr_vm *addr)
> > > > > >  {
> > > > > > @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct
> > > > > > vsock_transport *t, int features)
> > > > > >  	}
> > > > > >  
> > > > > >  	if (features & VSOCK_TRANSPORT_F_DGRAM) {
> > > > > > -		if (t_dgram) {
> > > > > > -			err = -EBUSY;
> > > > > > -			goto err_busy;
> > > > > > +		/* TODO: always chose the G2H variant over
> > > > > > others, support nesting later */
> > > > > > +		if (features & VSOCK_TRANSPORT_F_G2H) {
> > > > > > +			if (t_dgram)
> > > > > > +				pr_warn("virtio_vsock: t_dgram
> > > > > > already set\n");
> > > > > > +			t_dgram = t;
> > > > > > +		}
> > > > > > +
> > > > > > +		if (!t_dgram) {
> > > > > > +			t_dgram = t;
> > > > > >  		}
> > > > > > -		t_dgram = t;
> > > > > >  	}
> > > > > >  
> > > > > >  	if (features & VSOCK_TRANSPORT_F_LOCAL) {
> > > > > > diff --git a/net/vmw_vsock/virtio_transport.c
> > > > > > b/net/vmw_vsock/virtio_transport.c
> > > > > > index 073314312683..d4526ca462d2 100644
> > > > > > --- a/net/vmw_vsock/virtio_transport.c
> > > > > > +++ b/net/vmw_vsock/virtio_transport.c
> > > > > > @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
> > > > > >  		return -ENOMEM;
> > > > > >  
> > > > > >  	ret = vsock_core_register(&virtio_transport.transport,
> > > > > > -				  VSOCK_TRANSPORT_F_G2H);
> > > > > > +				  VSOCK_TRANSPORT_F_G2H |
> > > > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > > >  	if (ret)
> > > > > >  		goto out_wq;
> > > > > >  
> > > > > > diff --git a/net/vmw_vsock/virtio_transport_common.c
> > > > > > b/net/vmw_vsock/virtio_transport_common.c
> > > > > > index bdf16fff054f..aedb48728677 100644
> > > > > > --- a/net/vmw_vsock/virtio_transport_common.c
> > > > > > +++ b/net/vmw_vsock/virtio_transport_common.c
> > > > > > @@ -229,7 +229,9 @@
> > > > > > EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> > > > > >  
> > > > > >  static u16 virtio_transport_get_type(struct sock *sk)
> > > > > >  {
> > > > > > -	if (sk->sk_type == SOCK_STREAM)
> > > > > > +	if (sk->sk_type == SOCK_DGRAM)
> > > > > > +		return VIRTIO_VSOCK_TYPE_DGRAM;
> > > > > > +	else if (sk->sk_type == SOCK_STREAM)
> > > > > >  		return VIRTIO_VSOCK_TYPE_STREAM;
> > > > > >  	else
> > > > > >  		return VIRTIO_VSOCK_TYPE_SEQPACKET;
> > > > > > @@ -287,22 +289,29 @@ static int
> > > > > > virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> > > > > >  	vvs = vsk->trans;
> > > > > >  
> > > > > >  	/* we can send less than pkt_len bytes */
> > > > > > -	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> > > > > > -		pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > > > +	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> > > > > > +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > > > +			pkt_len =
> > > > > > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > > > +		else
> > > > > > +			return 0;
> > > > > > +	}
> > > > > >  
> > > > > > -	/* virtio_transport_get_credit might return less than
> > > > > > pkt_len credit */
> > > > > > -	pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> > > > > > +	if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > > > +		/* virtio_transport_get_credit might return
> > > > > > less than pkt_len credit */
> > > > > > +		pkt_len = virtio_transport_get_credit(vvs,
> > > > > > pkt_len);
> > > > > >  
> > > > > > -	/* Do not send zero length OP_RW pkt */
> > > > > > -	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> > > > > > -		return pkt_len;
> > > > > > +		/* Do not send zero length OP_RW pkt */
> > > > > > +		if (pkt_len == 0 && info->op ==
> > > > > > VIRTIO_VSOCK_OP_RW)
> > > > > > +			return pkt_len;
> > > > > > +	}
> > > > > >  
> > > > > >  	skb = virtio_transport_alloc_skb(info, pkt_len,
> > > > > >  					 src_cid, src_port,
> > > > > >  					 dst_cid, dst_port,
> > > > > >  					 &err);
> > > > > >  	if (!skb) {
> > > > > > -		virtio_transport_put_credit(vvs, pkt_len);
> > > > > > +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > > > +			virtio_transport_put_credit(vvs,
> > > > > > pkt_len);
> > > > > >  		return err;
> > > > > >  	}
> > > > > >  
> > > > > > @@ -586,6 +595,61 @@
> > > > > > virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
> > > > > >  }
> > > > > >  EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
> > > > > >  
> > > > > > +static ssize_t
> > > > > > +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
> > > > > > +				  struct msghdr *msg, size_t
> > > > > > len)
> > > > > > +{
> > > > > > +	struct virtio_vsock_sock *vvs = vsk->trans;
> > > > > > +	struct sk_buff *skb;
> > > > > > +	size_t total = 0;
> > > > > > +	u32 free_space;
> > > > > > +	int err = -EFAULT;
> > > > > > +
> > > > > > +	spin_lock_bh(&vvs->rx_lock);
> > > > > > +	if (total < len && !skb_queue_empty_lockless(&vvs-
> > > > > > >rx_queue)) {
> > > > > > +		skb = __skb_dequeue(&vvs->rx_queue);
> > > > > > +
> > > > > > +		total = len;
> > > > > > +		if (total > skb->len - vsock_metadata(skb)-
> > > > > > >off)
> > > > > > +			total = skb->len - vsock_metadata(skb)-
> > > > > > >off;
> > > > > > +		else if (total < skb->len -
> > > > > > vsock_metadata(skb)->off)
> > > > > > +			msg->msg_flags |= MSG_TRUNC;
> > > > > > +
> > > > > > +		/* sk_lock is held by caller so no one else can
> > > > > > dequeue.
> > > > > > +		 * Unlock rx_lock since memcpy_to_msg() may
> > > > > > sleep.
> > > > > > +		 */
> > > > > > +		spin_unlock_bh(&vvs->rx_lock);
> > > > > > +
> > > > > > +		err = memcpy_to_msg(msg, skb->data +
> > > > > > vsock_metadata(skb)->off, total);
> > > > > > +		if (err)
> > > > > > +			return err;
> > > > > > +
> > > > > > +		spin_lock_bh(&vvs->rx_lock);
> > > > > > +
> > > > > > +		virtio_transport_dec_rx_pkt(vvs, skb);
> > > > > > +		consume_skb(skb);
> > > > > > +	}
> > > > > > +
> > > > > > +	free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs-
> > > > > > >last_fwd_cnt);
> > > > > > +
> > > > > > +	spin_unlock_bh(&vvs->rx_lock);
> > > > > > +
> > > > > > +	if (total > 0 && msg->msg_name) {
> > > > > > +		/* Provide the address of the sender. */
> > > > > > +		DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr,
> > > > > > msg->msg_name);
> > > > > > +
> > > > > > +		vsock_addr_init(vm_addr,
> > > > > > le64_to_cpu(vsock_hdr(skb)->src_cid),
> > > > > > +				le32_to_cpu(vsock_hdr(skb)-
> > > > > > >src_port));
> > > > > > +		msg->msg_namelen = sizeof(*vm_addr);
> > > > > > +	}
> > > > > > +	return total;
> > > > > > +}
> > > > > > +
> > > > > > +static s64 virtio_transport_dgram_has_data(struct vsock_sock
> > > > > > *vsk)
> > > > > > +{
> > > > > > +	return virtio_transport_stream_has_data(vsk);
> > > > > > +}
> > > > > > +
> > > > > >  int
> > > > > >  virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
> > > > > >  				   struct msghdr *msg,
> > > > > > @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct
> > > > > > vsock_sock *vsk,
> > > > > >  			       struct msghdr *msg,
> > > > > >  			       size_t len, int flags)
> > > > > >  {
> > > > > > -	return -EOPNOTSUPP;
> > > > > > +	struct sock *sk;
> > > > > > +	size_t err = 0;
> > > > > > +	long timeout;
> > > > > > +
> > > > > > +	DEFINE_WAIT(wait);
> > > > > > +
> > > > > > +	sk = &vsk->sk;
> > > > > > +	err = 0;
> > > > > > +
> > > > > > +	if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags &
> > > > > > MSG_PEEK)
> > > > > > +		return -EOPNOTSUPP;
> > > > > > +
> > > > > > +	lock_sock(sk);
> > > > > > +
> > > > > > +	if (!len)
> > > > > > +		goto out;
> > > > > > +
> > > > > > +	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> > > > > > +
> > > > > > +	while (1) {
> > > > > > +		s64 ready;
> > > > > > +
> > > > > > +		prepare_to_wait(sk_sleep(sk), &wait,
> > > > > > TASK_INTERRUPTIBLE);
> > > > > > +		ready = virtio_transport_dgram_has_data(vsk);
> > > > > > +
> > > > > > +		if (ready == 0) {
> > > > > > +			if (timeout == 0) {
> > > > > > +				err = -EAGAIN;
> > > > > > +				finish_wait(sk_sleep(sk),
> > > > > > &wait);
> > > > > > +				break;
> > > > > > +			}
> > > > > > +
> > > > > > +			release_sock(sk);
> > > > > > +			timeout = schedule_timeout(timeout);
> > > > > > +			lock_sock(sk);
> > > > > > +
> > > > > > +			if (signal_pending(current)) {
> > > > > > +				err = sock_intr_errno(timeout);
> > > > > > +				finish_wait(sk_sleep(sk),
> > > > > > &wait);
> > > > > > +				break;
> > > > > > +			} else if (timeout == 0) {
> > > > > > +				err = -EAGAIN;
> > > > > > +				finish_wait(sk_sleep(sk),
> > > > > > &wait);
> > > > > > +				break;
> > > > > > +			}
> > > > > > +		} else {
> > > > > > +			finish_wait(sk_sleep(sk), &wait);
> > > > > > +
> > > > > > +			if (ready < 0) {
> > > > > > +				err = -ENOMEM;
> > > > > > +				goto out;
> > > > > > +			}
> > > > > > +
> > > > > > +			err =
> > > > > > virtio_transport_dgram_do_dequeue(vsk, msg, len);
> > > > > > +			break;
> > > > > > +		}
> > > > > > +	}
> > > > > > +out:
> > > > > > +	release_sock(sk);
> > > > > > +	return err;
> > > > > >  }
> > > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
> > > ^^^
> > > May be, this generic data waiting logic should be in af_vsock.c, as
> > > for stream/seqpacket?
> > > In this way, another transport which supports SOCK_DGRAM could
> > > reuse it.
> > 
> > I think that is a great idea. I'll test that change for v2.
> > 
> > Thanks.
> 
> Also for v2, i tested Your patchset a little bit(write here to not
> spread over all mails):
> 1) seqpacket test in vsock_test.c fails(seems MSG_EOR flag issue)

I will investigate.

> 2) i can't do rmmod with the following config(after testing):
>    CONFIG_VSOCKETS=m
>    CONFIG_VIRTIO_VSOCKETS=m
>    CONFIG_VIRTIO_VSOCKETS_COMMON=m
>    CONFIG_VHOST=m
>    CONFIG_VHOST_VSOCK=m
>    Guest is shutdown, but rmmod fails.
> 3) virtio_transport_init + virtio_transport_exit seems must be
>    under EXPORT_SYMBOL_GPL(), because both used in another module.

Definitely, will fix.

> 4) I tried to send 5kb(or 20kb not matter) piece of data, but got      
>    kernel panic both in guest and later in host.
> 

Thanks for catching that. I can reproduce it intermittently, but only
for seqpacket. Did you happen to see this for other socket types as
well?

Thanks

> Thank You
> > 
> > > > > >  
> > > > > > @@ -819,13 +942,13 @@
> > > > > > EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> > > > > >  int virtio_transport_dgram_bind(struct vsock_sock *vsk,
> > > > > >  				struct sockaddr_vm *addr)
> > > > > >  {
> > > > > > -	return -EOPNOTSUPP;
> > > > > > +	return vsock_bind_stream(vsk, addr);
> > > > > >  }
> > > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
> > > > > >  
> > > > > >  bool virtio_transport_dgram_allow(u32 cid, u32 port)
> > > > > >  {
> > > > > > -	return false;
> > > > > > +	return true;
> > > > > >  }
> > > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
> > > > > >  
> > > > > > @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct
> > > > > > vsock_sock *vsk,
> > > > > >  			       struct msghdr *msg,
> > > > > >  			       size_t dgram_len)
> > > > > >  {
> > > > > > -	return -EOPNOTSUPP;
> > > > > > +	struct virtio_vsock_pkt_info info = {
> > > > > > +		.op = VIRTIO_VSOCK_OP_RW,
> > > > > > +		.msg = msg,
> > > > > > +		.pkt_len = dgram_len,
> > > > > > +		.vsk = vsk,
> > > > > > +		.remote_cid = remote_addr->svm_cid,
> > > > > > +		.remote_port = remote_addr->svm_port,
> > > > > > +	};
> > > > > > +
> > > > > > +	return virtio_transport_send_pkt_info(vsk, &info);
> > > > > >  }
> > > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
> > > > > >  
> > > > > > @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct
> > > > > > sock *sk,
> > > > > >  	struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
> > > > > >  	int err = 0;
> > > > > >  
> > > > > > +	if (le16_to_cpu(vsock_hdr(skb)->type) ==
> > > > > > VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > > > +		virtio_transport_recv_enqueue(vsk, skb);
> > > > > > +		sk->sk_data_ready(sk);
> > > > > > +		return err;
> > > > > > +	}
> > > > > > +
> > > > > >  	switch (le16_to_cpu(hdr->op)) {
> > > > > >  	case VIRTIO_VSOCK_OP_RW:
> > > > > >  		virtio_transport_recv_enqueue(vsk, skb);
> > > > > > @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct
> > > > > > sock *sk, struct sk_buff *skb,
> > > > > >  static bool virtio_transport_valid_type(u16 type)
> > > > > >  {
> > > > > >  	return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> > > > > > -	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> > > > > > +	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> > > > > > +	       (type == VIRTIO_VSOCK_TYPE_DGRAM);
> > > > > >  }
> > > > > >  
> > > > > >  /* We are under the virtio-vsock's vsock->rx_lock or vhost-
> > > > > > vsock's vq->mutex
> > > > > > @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct
> > > > > > virtio_transport *t,
> > > > > >  		goto free_pkt;
> > > > > >  	}
> > > > > >  
> > > > > > +	if (sk->sk_type == SOCK_DGRAM) {
> > > > > > +		virtio_transport_recv_connected(sk, skb);
> > > > > > +		goto out;
> > > > > > +	}
> > > > > > +
> > > > > >  	space_available = virtio_transport_space_update(sk,
> > > > > > skb);
> > > > > >  
> > > > > >  	/* Update CID in case it has changed after a transport
> > > > > > reset event */
> > > > > > @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct
> > > > > > virtio_transport *t,
> > > > > >  		break;
> > > > > >  	}
> > > > > >  
> > > > > > +out:
> > > > > >  	release_sock(sk);
> > > > > >  
> > > > > >  	/* Release refcnt obtained when we fetched this socket
> > > > > > out of the
> > > > > > -- 
> > > > > > 2.35.1
> > > > > > 
> > > > > 
> > > > > -------------------------------------------------------------
> > > > > --------
> > > > > To unsubscribe, e-mail: 
> > > > > virtio-dev-unsubscribe@lists.oasis-open.org
> > > > > For additional commands, e-mail: 
> > > > > virtio-dev-help@lists.oasis-open.org
> > > > > 
> > 
> > ---------------------------------------------------------------------
> > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
> >
Arseniy Krasnov Aug. 17, 2022, 5:01 a.m. UTC | #6
On 16.08.2022 05:32, Bobby Eshleman wrote:
> CC'ing virtio-dev@lists.oasis-open.org
> 
> On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
>> This patch supports dgram in virtio and on the vhost side.
Hello,

sorry, i don't understand, how this maintains message boundaries? Or it
is unnecessary for SOCK_DGRAM?

Thanks
>>
>> Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
>> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
>> ---
>>  drivers/vhost/vsock.c                   |   2 +-
>>  include/net/af_vsock.h                  |   2 +
>>  include/uapi/linux/virtio_vsock.h       |   1 +
>>  net/vmw_vsock/af_vsock.c                |  26 +++-
>>  net/vmw_vsock/virtio_transport.c        |   2 +-
>>  net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++--
>>  6 files changed, 186 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>> index a5d1bdb786fe..3dc72a5647ca 100644
>> --- a/drivers/vhost/vsock.c
>> +++ b/drivers/vhost/vsock.c
>> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
>>  	int ret;
>>  
>>  	ret = vsock_core_register(&vhost_transport.transport,
>> -				  VSOCK_TRANSPORT_F_H2G);
>> +				  VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM);
>>  	if (ret < 0)
>>  		return ret;
>>  
>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>> index 1c53c4c4d88f..37e55c81e4df 100644
>> --- a/include/net/af_vsock.h
>> +++ b/include/net/af_vsock.h
>> @@ -78,6 +78,8 @@ struct vsock_sock {
>>  s64 vsock_stream_has_data(struct vsock_sock *vsk);
>>  s64 vsock_stream_has_space(struct vsock_sock *vsk);
>>  struct sock *vsock_create_connected(struct sock *parent);
>> +int vsock_bind_stream(struct vsock_sock *vsk,
>> +		      struct sockaddr_vm *addr);
>>  
>>  /**** TRANSPORT ****/
>>  
>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
>> index 857df3a3a70d..0975b9c88292 100644
>> --- a/include/uapi/linux/virtio_vsock.h
>> +++ b/include/uapi/linux/virtio_vsock.h
>> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
>>  enum virtio_vsock_type {
>>  	VIRTIO_VSOCK_TYPE_STREAM = 1,
>>  	VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
>> +	VIRTIO_VSOCK_TYPE_DGRAM = 3,
>>  };
>>  
>>  enum virtio_vsock_op {
>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>> index 1893f8aafa48..87e4ae1866d3 100644
>> --- a/net/vmw_vsock/af_vsock.c
>> +++ b/net/vmw_vsock/af_vsock.c
>> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
>>  	return 0;
>>  }
>>  
>> +int vsock_bind_stream(struct vsock_sock *vsk,
>> +		      struct sockaddr_vm *addr)
>> +{
>> +	int retval;
>> +
>> +	spin_lock_bh(&vsock_table_lock);
>> +	retval = __vsock_bind_connectible(vsk, addr);
>> +	spin_unlock_bh(&vsock_table_lock);
>> +
>> +	return retval;
>> +}
>> +EXPORT_SYMBOL(vsock_bind_stream);
>> +
>>  static int __vsock_bind_dgram(struct vsock_sock *vsk,
>>  			      struct sockaddr_vm *addr)
>>  {
>> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features)
>>  	}
>>  
>>  	if (features & VSOCK_TRANSPORT_F_DGRAM) {
>> -		if (t_dgram) {
>> -			err = -EBUSY;
>> -			goto err_busy;
>> +		/* TODO: always chose the G2H variant over others, support nesting later */
>> +		if (features & VSOCK_TRANSPORT_F_G2H) {
>> +			if (t_dgram)
>> +				pr_warn("virtio_vsock: t_dgram already set\n");
>> +			t_dgram = t;
>> +		}
>> +
>> +		if (!t_dgram) {
>> +			t_dgram = t;
>>  		}
>> -		t_dgram = t;
>>  	}
>>  
>>  	if (features & VSOCK_TRANSPORT_F_LOCAL) {
>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>> index 073314312683..d4526ca462d2 100644
>> --- a/net/vmw_vsock/virtio_transport.c
>> +++ b/net/vmw_vsock/virtio_transport.c
>> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
>>  		return -ENOMEM;
>>  
>>  	ret = vsock_core_register(&virtio_transport.transport,
>> -				  VSOCK_TRANSPORT_F_G2H);
>> +				  VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM);
>>  	if (ret)
>>  		goto out_wq;
>>  
>> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>> index bdf16fff054f..aedb48728677 100644
>> --- a/net/vmw_vsock/virtio_transport_common.c
>> +++ b/net/vmw_vsock/virtio_transport_common.c
>> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
>>  
>>  static u16 virtio_transport_get_type(struct sock *sk)
>>  {
>> -	if (sk->sk_type == SOCK_STREAM)
>> +	if (sk->sk_type == SOCK_DGRAM)
>> +		return VIRTIO_VSOCK_TYPE_DGRAM;
>> +	else if (sk->sk_type == SOCK_STREAM)
>>  		return VIRTIO_VSOCK_TYPE_STREAM;
>>  	else
>>  		return VIRTIO_VSOCK_TYPE_SEQPACKET;
>> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>>  	vvs = vsk->trans;
>>  
>>  	/* we can send less than pkt_len bytes */
>> -	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
>> -		pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
>> +	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
>> +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
>> +			pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
>> +		else
>> +			return 0;
>> +	}
>>  
>> -	/* virtio_transport_get_credit might return less than pkt_len credit */
>> -	pkt_len = virtio_transport_get_credit(vvs, pkt_len);
>> +	if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
>> +		/* virtio_transport_get_credit might return less than pkt_len credit */
>> +		pkt_len = virtio_transport_get_credit(vvs, pkt_len);
>>  
>> -	/* Do not send zero length OP_RW pkt */
>> -	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>> -		return pkt_len;
>> +		/* Do not send zero length OP_RW pkt */
>> +		if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>> +			return pkt_len;
>> +	}
>>  
>>  	skb = virtio_transport_alloc_skb(info, pkt_len,
>>  					 src_cid, src_port,
>>  					 dst_cid, dst_port,
>>  					 &err);
>>  	if (!skb) {
>> -		virtio_transport_put_credit(vvs, pkt_len);
>> +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
>> +			virtio_transport_put_credit(vvs, pkt_len);
>>  		return err;
>>  	}
>>  
>> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
>>  }
>>  EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
>>  
>> +static ssize_t
>> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
>> +				  struct msghdr *msg, size_t len)
>> +{
>> +	struct virtio_vsock_sock *vvs = vsk->trans;
>> +	struct sk_buff *skb;
>> +	size_t total = 0;
>> +	u32 free_space;
>> +	int err = -EFAULT;
>> +
>> +	spin_lock_bh(&vvs->rx_lock);
>> +	if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
>> +		skb = __skb_dequeue(&vvs->rx_queue);
>> +
>> +		total = len;
>> +		if (total > skb->len - vsock_metadata(skb)->off)
>> +			total = skb->len - vsock_metadata(skb)->off;
>> +		else if (total < skb->len - vsock_metadata(skb)->off)
>> +			msg->msg_flags |= MSG_TRUNC;
>> +
>> +		/* sk_lock is held by caller so no one else can dequeue.
>> +		 * Unlock rx_lock since memcpy_to_msg() may sleep.
>> +		 */
>> +		spin_unlock_bh(&vvs->rx_lock);
>> +
>> +		err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total);
>> +		if (err)
>> +			return err;
>> +
>> +		spin_lock_bh(&vvs->rx_lock);
>> +
>> +		virtio_transport_dec_rx_pkt(vvs, skb);
>> +		consume_skb(skb);
>> +	}
>> +
>> +	free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
>> +
>> +	spin_unlock_bh(&vvs->rx_lock);
>> +
>> +	if (total > 0 && msg->msg_name) {
>> +		/* Provide the address of the sender. */
>> +		DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
>> +
>> +		vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
>> +				le32_to_cpu(vsock_hdr(skb)->src_port));
>> +		msg->msg_namelen = sizeof(*vm_addr);
>> +	}
>> +	return total;
>> +}
>> +
>> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk)
>> +{
>> +	return virtio_transport_stream_has_data(vsk);
>> +}
>> +
>>  int
>>  virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
>>  				   struct msghdr *msg,
>> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
>>  			       struct msghdr *msg,
>>  			       size_t len, int flags)
>>  {
>> -	return -EOPNOTSUPP;
>> +	struct sock *sk;
>> +	size_t err = 0;
>> +	long timeout;
>> +
>> +	DEFINE_WAIT(wait);
>> +
>> +	sk = &vsk->sk;
>> +	err = 0;
>> +
>> +	if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK)
>> +		return -EOPNOTSUPP;
>> +
>> +	lock_sock(sk);
>> +
>> +	if (!len)
>> +		goto out;
>> +
>> +	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
>> +
>> +	while (1) {
>> +		s64 ready;
>> +
>> +		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
>> +		ready = virtio_transport_dgram_has_data(vsk);
>> +
>> +		if (ready == 0) {
>> +			if (timeout == 0) {
>> +				err = -EAGAIN;
>> +				finish_wait(sk_sleep(sk), &wait);
>> +				break;
>> +			}
>> +
>> +			release_sock(sk);
>> +			timeout = schedule_timeout(timeout);
>> +			lock_sock(sk);
>> +
>> +			if (signal_pending(current)) {
>> +				err = sock_intr_errno(timeout);
>> +				finish_wait(sk_sleep(sk), &wait);
>> +				break;
>> +			} else if (timeout == 0) {
>> +				err = -EAGAIN;
>> +				finish_wait(sk_sleep(sk), &wait);
>> +				break;
>> +			}
>> +		} else {
>> +			finish_wait(sk_sleep(sk), &wait);
>> +
>> +			if (ready < 0) {
>> +				err = -ENOMEM;
>> +				goto out;
>> +			}
>> +
>> +			err = virtio_transport_dgram_do_dequeue(vsk, msg, len);
>> +			break;
>> +		}
>> +	}
>> +out:
>> +	release_sock(sk);
>> +	return err;
>>  }
>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
>>  
>> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
>>  int virtio_transport_dgram_bind(struct vsock_sock *vsk,
>>  				struct sockaddr_vm *addr)
>>  {
>> -	return -EOPNOTSUPP;
>> +	return vsock_bind_stream(vsk, addr);
>>  }
>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
>>  
>>  bool virtio_transport_dgram_allow(u32 cid, u32 port)
>>  {
>> -	return false;
>> +	return true;
>>  }
>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
>>  
>> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
>>  			       struct msghdr *msg,
>>  			       size_t dgram_len)
>>  {
>> -	return -EOPNOTSUPP;
>> +	struct virtio_vsock_pkt_info info = {
>> +		.op = VIRTIO_VSOCK_OP_RW,
>> +		.msg = msg,
>> +		.pkt_len = dgram_len,
>> +		.vsk = vsk,
>> +		.remote_cid = remote_addr->svm_cid,
>> +		.remote_port = remote_addr->svm_port,
>> +	};
>> +
>> +	return virtio_transport_send_pkt_info(vsk, &info);
>>  }
>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
>>  
>> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk,
>>  	struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
>>  	int err = 0;
>>  
>> +	if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
>> +		virtio_transport_recv_enqueue(vsk, skb);
>> +		sk->sk_data_ready(sk);
>> +		return err;
>> +	}
>> +
>>  	switch (le16_to_cpu(hdr->op)) {
>>  	case VIRTIO_VSOCK_OP_RW:
>>  		virtio_transport_recv_enqueue(vsk, skb);
>> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
>>  static bool virtio_transport_valid_type(u16 type)
>>  {
>>  	return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
>> -	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
>> +	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
>> +	       (type == VIRTIO_VSOCK_TYPE_DGRAM);
>>  }
>>  
>>  /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
>> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>>  		goto free_pkt;
>>  	}
>>  
>> +	if (sk->sk_type == SOCK_DGRAM) {
>> +		virtio_transport_recv_connected(sk, skb);
>> +		goto out;
>> +	}
>> +
>>  	space_available = virtio_transport_space_update(sk, skb);
>>  
>>  	/* Update CID in case it has changed after a transport reset event */
>> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>>  		break;
>>  	}
>>  
>> +out:
>>  	release_sock(sk);
>>  
>>  	/* Release refcnt obtained when we fetched this socket out of the
>> -- 
>> 2.35.1
>>
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>
Arseniy Krasnov Aug. 17, 2022, 5:42 a.m. UTC | #7
On 17.08.2022 08:01, Arseniy Krasnov wrote:
> On 16.08.2022 05:32, Bobby Eshleman wrote:
>> CC'ing virtio-dev@lists.oasis-open.org
>>
>> On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
>>> This patch supports dgram in virtio and on the vhost side.
> Hello,
> 
> sorry, i don't understand, how this maintains message boundaries? Or it
> is unnecessary for SOCK_DGRAM?
> 
> Thanks
>>>
>>> Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
>>> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
>>> ---
>>>  drivers/vhost/vsock.c                   |   2 +-
>>>  include/net/af_vsock.h                  |   2 +
>>>  include/uapi/linux/virtio_vsock.h       |   1 +
>>>  net/vmw_vsock/af_vsock.c                |  26 +++-
>>>  net/vmw_vsock/virtio_transport.c        |   2 +-
>>>  net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++--
>>>  6 files changed, 186 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>>> index a5d1bdb786fe..3dc72a5647ca 100644
>>> --- a/drivers/vhost/vsock.c
>>> +++ b/drivers/vhost/vsock.c
>>> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
>>>  	int ret;
>>>  
>>>  	ret = vsock_core_register(&vhost_transport.transport,
>>> -				  VSOCK_TRANSPORT_F_H2G);
>>> +				  VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM);
>>>  	if (ret < 0)
>>>  		return ret;
>>>  
>>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>>> index 1c53c4c4d88f..37e55c81e4df 100644
>>> --- a/include/net/af_vsock.h
>>> +++ b/include/net/af_vsock.h
>>> @@ -78,6 +78,8 @@ struct vsock_sock {
>>>  s64 vsock_stream_has_data(struct vsock_sock *vsk);
>>>  s64 vsock_stream_has_space(struct vsock_sock *vsk);
>>>  struct sock *vsock_create_connected(struct sock *parent);
>>> +int vsock_bind_stream(struct vsock_sock *vsk,
>>> +		      struct sockaddr_vm *addr);
>>>  
>>>  /**** TRANSPORT ****/
>>>  
>>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
>>> index 857df3a3a70d..0975b9c88292 100644
>>> --- a/include/uapi/linux/virtio_vsock.h
>>> +++ b/include/uapi/linux/virtio_vsock.h
>>> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
>>>  enum virtio_vsock_type {
>>>  	VIRTIO_VSOCK_TYPE_STREAM = 1,
>>>  	VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
>>> +	VIRTIO_VSOCK_TYPE_DGRAM = 3,
>>>  };
>>>  
>>>  enum virtio_vsock_op {
>>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>>> index 1893f8aafa48..87e4ae1866d3 100644
>>> --- a/net/vmw_vsock/af_vsock.c
>>> +++ b/net/vmw_vsock/af_vsock.c
>>> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
>>>  	return 0;
>>>  }
>>>  
>>> +int vsock_bind_stream(struct vsock_sock *vsk,
>>> +		      struct sockaddr_vm *addr)
>>> +{
>>> +	int retval;
>>> +
>>> +	spin_lock_bh(&vsock_table_lock);
>>> +	retval = __vsock_bind_connectible(vsk, addr);
>>> +	spin_unlock_bh(&vsock_table_lock);
>>> +
>>> +	return retval;
>>> +}
>>> +EXPORT_SYMBOL(vsock_bind_stream);
>>> +
>>>  static int __vsock_bind_dgram(struct vsock_sock *vsk,
>>>  			      struct sockaddr_vm *addr)
>>>  {
>>> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features)
>>>  	}
>>>  
>>>  	if (features & VSOCK_TRANSPORT_F_DGRAM) {
>>> -		if (t_dgram) {
>>> -			err = -EBUSY;
>>> -			goto err_busy;
>>> +		/* TODO: always chose the G2H variant over others, support nesting later */
>>> +		if (features & VSOCK_TRANSPORT_F_G2H) {
>>> +			if (t_dgram)
>>> +				pr_warn("virtio_vsock: t_dgram already set\n");
>>> +			t_dgram = t;
>>> +		}
>>> +
>>> +		if (!t_dgram) {
>>> +			t_dgram = t;
>>>  		}
>>> -		t_dgram = t;
>>>  	}
>>>  
>>>  	if (features & VSOCK_TRANSPORT_F_LOCAL) {
>>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>>> index 073314312683..d4526ca462d2 100644
>>> --- a/net/vmw_vsock/virtio_transport.c
>>> +++ b/net/vmw_vsock/virtio_transport.c
>>> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
>>>  		return -ENOMEM;
>>>  
>>>  	ret = vsock_core_register(&virtio_transport.transport,
>>> -				  VSOCK_TRANSPORT_F_G2H);
>>> +				  VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM);
>>>  	if (ret)
>>>  		goto out_wq;
>>>  
>>> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>>> index bdf16fff054f..aedb48728677 100644
>>> --- a/net/vmw_vsock/virtio_transport_common.c
>>> +++ b/net/vmw_vsock/virtio_transport_common.c
>>> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
>>>  
>>>  static u16 virtio_transport_get_type(struct sock *sk)
>>>  {
>>> -	if (sk->sk_type == SOCK_STREAM)
>>> +	if (sk->sk_type == SOCK_DGRAM)
>>> +		return VIRTIO_VSOCK_TYPE_DGRAM;
>>> +	else if (sk->sk_type == SOCK_STREAM)
>>>  		return VIRTIO_VSOCK_TYPE_STREAM;
>>>  	else
>>>  		return VIRTIO_VSOCK_TYPE_SEQPACKET;
>>> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>>>  	vvs = vsk->trans;
>>>  
>>>  	/* we can send less than pkt_len bytes */
>>> -	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
>>> -		pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
>>> +	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
>>> +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
>>> +			pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
>>> +		else
>>> +			return 0;
>>> +	}
>>>  
>>> -	/* virtio_transport_get_credit might return less than pkt_len credit */
>>> -	pkt_len = virtio_transport_get_credit(vvs, pkt_len);
>>> +	if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
>>> +		/* virtio_transport_get_credit might return less than pkt_len credit */
>>> +		pkt_len = virtio_transport_get_credit(vvs, pkt_len);
>>>  
>>> -	/* Do not send zero length OP_RW pkt */
>>> -	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>>> -		return pkt_len;
>>> +		/* Do not send zero length OP_RW pkt */
>>> +		if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>>> +			return pkt_len;
>>> +	}
>>>  
>>>  	skb = virtio_transport_alloc_skb(info, pkt_len,
>>>  					 src_cid, src_port,
>>>  					 dst_cid, dst_port,
>>>  					 &err);
>>>  	if (!skb) {
>>> -		virtio_transport_put_credit(vvs, pkt_len);
>>> +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
>>> +			virtio_transport_put_credit(vvs, pkt_len);
>>>  		return err;
>>>  	}
>>>  
>>> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
>>>  }
>>>  EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
>>>  
>>> +static ssize_t
>>> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
>>> +				  struct msghdr *msg, size_t len)
>>> +{
>>> +	struct virtio_vsock_sock *vvs = vsk->trans;
>>> +	struct sk_buff *skb;
>>> +	size_t total = 0;
>>> +	u32 free_space;
>>> +	int err = -EFAULT;
>>> +
>>> +	spin_lock_bh(&vvs->rx_lock);
>>> +	if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
>>> +		skb = __skb_dequeue(&vvs->rx_queue);
>>> +
>>> +		total = len;
>>> +		if (total > skb->len - vsock_metadata(skb)->off)
>>> +			total = skb->len - vsock_metadata(skb)->off;
>>> +		else if (total < skb->len - vsock_metadata(skb)->off)
>>> +			msg->msg_flags |= MSG_TRUNC;
>>> +
>>> +		/* sk_lock is held by caller so no one else can dequeue.
>>> +		 * Unlock rx_lock since memcpy_to_msg() may sleep.
>>> +		 */
>>> +		spin_unlock_bh(&vvs->rx_lock);
>>> +
>>> +		err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total);
>>> +		if (err)
>>> +			return err;
>>> +
>>> +		spin_lock_bh(&vvs->rx_lock);
>>> +
>>> +		virtio_transport_dec_rx_pkt(vvs, skb);
>>> +		consume_skb(skb);
>>> +	}
>>> +
>>> +	free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
>>> +
>>> +	spin_unlock_bh(&vvs->rx_lock);
>>> +
>>> +	if (total > 0 && msg->msg_name) {
>>> +		/* Provide the address of the sender. */
>>> +		DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
>>> +
>>> +		vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
>>> +				le32_to_cpu(vsock_hdr(skb)->src_port));
>>> +		msg->msg_namelen = sizeof(*vm_addr);
>>> +	}
>>> +	return total;
>>> +}
>>> +
>>> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk)
>>> +{
>>> +	return virtio_transport_stream_has_data(vsk);
>>> +}
>>> +
>>>  int
>>>  virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
>>>  				   struct msghdr *msg,
>>> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
>>>  			       struct msghdr *msg,
>>>  			       size_t len, int flags)
>>>  {
>>> -	return -EOPNOTSUPP;
>>> +	struct sock *sk;
>>> +	size_t err = 0;
>>> +	long timeout;
>>> +
>>> +	DEFINE_WAIT(wait);
>>> +
>>> +	sk = &vsk->sk;
>>> +	err = 0;
>>> +
>>> +	if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK)
>>> +		return -EOPNOTSUPP;
>>> +
>>> +	lock_sock(sk);
>>> +
>>> +	if (!len)
>>> +		goto out;
>>> +
>>> +	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
>>> +
>>> +	while (1) {
>>> +		s64 ready;
>>> +
>>> +		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
>>> +		ready = virtio_transport_dgram_has_data(vsk);
>>> +
>>> +		if (ready == 0) {
>>> +			if (timeout == 0) {
>>> +				err = -EAGAIN;
>>> +				finish_wait(sk_sleep(sk), &wait);
>>> +				break;
>>> +			}
>>> +
>>> +			release_sock(sk);
>>> +			timeout = schedule_timeout(timeout);
>>> +			lock_sock(sk);
>>> +
>>> +			if (signal_pending(current)) {
>>> +				err = sock_intr_errno(timeout);
>>> +				finish_wait(sk_sleep(sk), &wait);
>>> +				break;
>>> +			} else if (timeout == 0) {
>>> +				err = -EAGAIN;
>>> +				finish_wait(sk_sleep(sk), &wait);
>>> +				break;
>>> +			}
>>> +		} else {
>>> +			finish_wait(sk_sleep(sk), &wait);
>>> +
>>> +			if (ready < 0) {
>>> +				err = -ENOMEM;
>>> +				goto out;
>>> +			}
>>> +
>>> +			err = virtio_transport_dgram_do_dequeue(vsk, msg, len);
>>> +			break;
>>> +		}
>>> +	}
>>> +out:
>>> +	release_sock(sk);
>>> +	return err;
>>>  }
>>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
^^^
May be, this generic data waiting logic should be in af_vsock.c, as for stream/seqpacket?
In this way, another transport which supports SOCK_DGRAM could reuse it.
>>>  
>>> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
>>>  int virtio_transport_dgram_bind(struct vsock_sock *vsk,
>>>  				struct sockaddr_vm *addr)
>>>  {
>>> -	return -EOPNOTSUPP;
>>> +	return vsock_bind_stream(vsk, addr);
>>>  }
>>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
>>>  
>>>  bool virtio_transport_dgram_allow(u32 cid, u32 port)
>>>  {
>>> -	return false;
>>> +	return true;
>>>  }
>>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
>>>  
>>> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
>>>  			       struct msghdr *msg,
>>>  			       size_t dgram_len)
>>>  {
>>> -	return -EOPNOTSUPP;
>>> +	struct virtio_vsock_pkt_info info = {
>>> +		.op = VIRTIO_VSOCK_OP_RW,
>>> +		.msg = msg,
>>> +		.pkt_len = dgram_len,
>>> +		.vsk = vsk,
>>> +		.remote_cid = remote_addr->svm_cid,
>>> +		.remote_port = remote_addr->svm_port,
>>> +	};
>>> +
>>> +	return virtio_transport_send_pkt_info(vsk, &info);
>>>  }
>>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
>>>  
>>> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk,
>>>  	struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
>>>  	int err = 0;
>>>  
>>> +	if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
>>> +		virtio_transport_recv_enqueue(vsk, skb);
>>> +		sk->sk_data_ready(sk);
>>> +		return err;
>>> +	}
>>> +
>>>  	switch (le16_to_cpu(hdr->op)) {
>>>  	case VIRTIO_VSOCK_OP_RW:
>>>  		virtio_transport_recv_enqueue(vsk, skb);
>>> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
>>>  static bool virtio_transport_valid_type(u16 type)
>>>  {
>>>  	return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
>>> -	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
>>> +	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
>>> +	       (type == VIRTIO_VSOCK_TYPE_DGRAM);
>>>  }
>>>  
>>>  /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
>>> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>>>  		goto free_pkt;
>>>  	}
>>>  
>>> +	if (sk->sk_type == SOCK_DGRAM) {
>>> +		virtio_transport_recv_connected(sk, skb);
>>> +		goto out;
>>> +	}
>>> +
>>>  	space_available = virtio_transport_space_update(sk, skb);
>>>  
>>>  	/* Update CID in case it has changed after a transport reset event */
>>> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>>>  		break;
>>>  	}
>>>  
>>> +out:
>>>  	release_sock(sk);
>>>  
>>>  	/* Release refcnt obtained when we fetched this socket out of the
>>> -- 
>>> 2.35.1
>>>
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
>> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>>
>
Arseniy Krasnov Aug. 18, 2022, 8:24 a.m. UTC | #8
On Tue, 2022-08-16 at 09:57 +0000, Bobby Eshleman wrote:
> On Wed, Aug 17, 2022 at 05:01:00AM +0000, Arseniy Krasnov wrote:
> > On 16.08.2022 05:32, Bobby Eshleman wrote:
> > > CC'ing virtio-dev@lists.oasis-open.org
> > > 
> > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
> > > > This patch supports dgram in virtio and on the vhost side.
> > Hello,
> > 
> > sorry, i don't understand, how this maintains message boundaries?
> > Or it
> > is unnecessary for SOCK_DGRAM?
> > 
> > Thanks
> 
> If I understand your question, the length is included in the header,
> so
> receivers always know that header start + header length + payload
> length
> marks the message boundary.

I mean, consider the following case: host sends 5kb packet to guest.
Guest uses 4kb virtio rx buffers, so in drivers/vhost/vsock.c this 5kb
packet(e.g. its payload) will be placed to 2 virtio rx buffers - 4kb
to first buffer and rest 1kb to second buffer. Is it implemented, that
receiver gets whole 5kb piece of data during single 'read()/recv()'
system call?

Thanks

> 
> > > > Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
> > > > Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> > > > ---
> > > >  drivers/vhost/vsock.c                   |   2 +-
> > > >  include/net/af_vsock.h                  |   2 +
> > > >  include/uapi/linux/virtio_vsock.h       |   1 +
> > > >  net/vmw_vsock/af_vsock.c                |  26 +++-
> > > >  net/vmw_vsock/virtio_transport.c        |   2 +-
> > > >  net/vmw_vsock/virtio_transport_common.c | 173
> > > > ++++++++++++++++++++++--
> > > >  6 files changed, 186 insertions(+), 20 deletions(-)
> > > > 
> > > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> > > > index a5d1bdb786fe..3dc72a5647ca 100644
> > > > --- a/drivers/vhost/vsock.c
> > > > +++ b/drivers/vhost/vsock.c
> > > > @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
> > > >  	int ret;
> > > >  
> > > >  	ret = vsock_core_register(&vhost_transport.transport,
> > > > -				  VSOCK_TRANSPORT_F_H2G);
> > > > +				  VSOCK_TRANSPORT_F_H2G |
> > > > VSOCK_TRANSPORT_F_DGRAM);
> > > >  	if (ret < 0)
> > > >  		return ret;
> > > >  
> > > > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> > > > index 1c53c4c4d88f..37e55c81e4df 100644
> > > > --- a/include/net/af_vsock.h
> > > > +++ b/include/net/af_vsock.h
> > > > @@ -78,6 +78,8 @@ struct vsock_sock {
> > > >  s64 vsock_stream_has_data(struct vsock_sock *vsk);
> > > >  s64 vsock_stream_has_space(struct vsock_sock *vsk);
> > > >  struct sock *vsock_create_connected(struct sock *parent);
> > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > +		      struct sockaddr_vm *addr);
> > > >  
> > > >  /**** TRANSPORT ****/
> > > >  
> > > > diff --git a/include/uapi/linux/virtio_vsock.h
> > > > b/include/uapi/linux/virtio_vsock.h
> > > > index 857df3a3a70d..0975b9c88292 100644
> > > > --- a/include/uapi/linux/virtio_vsock.h
> > > > +++ b/include/uapi/linux/virtio_vsock.h
> > > > @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> > > >  enum virtio_vsock_type {
> > > >  	VIRTIO_VSOCK_TYPE_STREAM = 1,
> > > >  	VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> > > > +	VIRTIO_VSOCK_TYPE_DGRAM = 3,
> > > >  };
> > > >  
> > > >  enum virtio_vsock_op {
> > > > diff --git a/net/vmw_vsock/af_vsock.c
> > > > b/net/vmw_vsock/af_vsock.c
> > > > index 1893f8aafa48..87e4ae1866d3 100644
> > > > --- a/net/vmw_vsock/af_vsock.c
> > > > +++ b/net/vmw_vsock/af_vsock.c
> > > > @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct
> > > > vsock_sock *vsk,
> > > >  	return 0;
> > > >  }
> > > >  
> > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > +		      struct sockaddr_vm *addr)
> > > > +{
> > > > +	int retval;
> > > > +
> > > > +	spin_lock_bh(&vsock_table_lock);
> > > > +	retval = __vsock_bind_connectible(vsk, addr);
> > > > +	spin_unlock_bh(&vsock_table_lock);
> > > > +
> > > > +	return retval;
> > > > +}
> > > > +EXPORT_SYMBOL(vsock_bind_stream);
> > > > +
> > > >  static int __vsock_bind_dgram(struct vsock_sock *vsk,
> > > >  			      struct sockaddr_vm *addr)
> > > >  {
> > > > @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct
> > > > vsock_transport *t, int features)
> > > >  	}
> > > >  
> > > >  	if (features & VSOCK_TRANSPORT_F_DGRAM) {
> > > > -		if (t_dgram) {
> > > > -			err = -EBUSY;
> > > > -			goto err_busy;
> > > > +		/* TODO: always chose the G2H variant over
> > > > others, support nesting later */
> > > > +		if (features & VSOCK_TRANSPORT_F_G2H) {
> > > > +			if (t_dgram)
> > > > +				pr_warn("virtio_vsock: t_dgram
> > > > already set\n");
> > > > +			t_dgram = t;
> > > > +		}
> > > > +
> > > > +		if (!t_dgram) {
> > > > +			t_dgram = t;
> > > >  		}
> > > > -		t_dgram = t;
> > > >  	}
> > > >  
> > > >  	if (features & VSOCK_TRANSPORT_F_LOCAL) {
> > > > diff --git a/net/vmw_vsock/virtio_transport.c
> > > > b/net/vmw_vsock/virtio_transport.c
> > > > index 073314312683..d4526ca462d2 100644
> > > > --- a/net/vmw_vsock/virtio_transport.c
> > > > +++ b/net/vmw_vsock/virtio_transport.c
> > > > @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
> > > >  		return -ENOMEM;
> > > >  
> > > >  	ret = vsock_core_register(&virtio_transport.transport,
> > > > -				  VSOCK_TRANSPORT_F_G2H);
> > > > +				  VSOCK_TRANSPORT_F_G2H |
> > > > VSOCK_TRANSPORT_F_DGRAM);
> > > >  	if (ret)
> > > >  		goto out_wq;
> > > >  
> > > > diff --git a/net/vmw_vsock/virtio_transport_common.c
> > > > b/net/vmw_vsock/virtio_transport_common.c
> > > > index bdf16fff054f..aedb48728677 100644
> > > > --- a/net/vmw_vsock/virtio_transport_common.c
> > > > +++ b/net/vmw_vsock/virtio_transport_common.c
> > > > @@ -229,7 +229,9 @@
> > > > EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> > > >  
> > > >  static u16 virtio_transport_get_type(struct sock *sk)
> > > >  {
> > > > -	if (sk->sk_type == SOCK_STREAM)
> > > > +	if (sk->sk_type == SOCK_DGRAM)
> > > > +		return VIRTIO_VSOCK_TYPE_DGRAM;
> > > > +	else if (sk->sk_type == SOCK_STREAM)
> > > >  		return VIRTIO_VSOCK_TYPE_STREAM;
> > > >  	else
> > > >  		return VIRTIO_VSOCK_TYPE_SEQPACKET;
> > > > @@ -287,22 +289,29 @@ static int
> > > > virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> > > >  	vvs = vsk->trans;
> > > >  
> > > >  	/* we can send less than pkt_len bytes */
> > > > -	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> > > > -		pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > +	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> > > > +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > +			pkt_len =
> > > > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > +		else
> > > > +			return 0;
> > > > +	}
> > > >  
> > > > -	/* virtio_transport_get_credit might return less than
> > > > pkt_len credit */
> > > > -	pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> > > > +	if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > +		/* virtio_transport_get_credit might return
> > > > less than pkt_len credit */
> > > > +		pkt_len = virtio_transport_get_credit(vvs,
> > > > pkt_len);
> > > >  
> > > > -	/* Do not send zero length OP_RW pkt */
> > > > -	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> > > > -		return pkt_len;
> > > > +		/* Do not send zero length OP_RW pkt */
> > > > +		if (pkt_len == 0 && info->op ==
> > > > VIRTIO_VSOCK_OP_RW)
> > > > +			return pkt_len;
> > > > +	}
> > > >  
> > > >  	skb = virtio_transport_alloc_skb(info, pkt_len,
> > > >  					 src_cid, src_port,
> > > >  					 dst_cid, dst_port,
> > > >  					 &err);
> > > >  	if (!skb) {
> > > > -		virtio_transport_put_credit(vvs, pkt_len);
> > > > +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > +			virtio_transport_put_credit(vvs,
> > > > pkt_len);
> > > >  		return err;
> > > >  	}
> > > >  
> > > > @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct
> > > > vsock_sock *vsk,
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
> > > >  
> > > > +static ssize_t
> > > > +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
> > > > +				  struct msghdr *msg, size_t
> > > > len)
> > > > +{
> > > > +	struct virtio_vsock_sock *vvs = vsk->trans;
> > > > +	struct sk_buff *skb;
> > > > +	size_t total = 0;
> > > > +	u32 free_space;
> > > > +	int err = -EFAULT;
> > > > +
> > > > +	spin_lock_bh(&vvs->rx_lock);
> > > > +	if (total < len && !skb_queue_empty_lockless(&vvs-
> > > > >rx_queue)) {
> > > > +		skb = __skb_dequeue(&vvs->rx_queue);
> > > > +
> > > > +		total = len;
> > > > +		if (total > skb->len - vsock_metadata(skb)-
> > > > >off)
> > > > +			total = skb->len - vsock_metadata(skb)-
> > > > >off;
> > > > +		else if (total < skb->len -
> > > > vsock_metadata(skb)->off)
> > > > +			msg->msg_flags |= MSG_TRUNC;
> > > > +
> > > > +		/* sk_lock is held by caller so no one else can
> > > > dequeue.
> > > > +		 * Unlock rx_lock since memcpy_to_msg() may
> > > > sleep.
> > > > +		 */
> > > > +		spin_unlock_bh(&vvs->rx_lock);
> > > > +
> > > > +		err = memcpy_to_msg(msg, skb->data +
> > > > vsock_metadata(skb)->off, total);
> > > > +		if (err)
> > > > +			return err;
> > > > +
> > > > +		spin_lock_bh(&vvs->rx_lock);
> > > > +
> > > > +		virtio_transport_dec_rx_pkt(vvs, skb);
> > > > +		consume_skb(skb);
> > > > +	}
> > > > +
> > > > +	free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs-
> > > > >last_fwd_cnt);
> > > > +
> > > > +	spin_unlock_bh(&vvs->rx_lock);
> > > > +
> > > > +	if (total > 0 && msg->msg_name) {
> > > > +		/* Provide the address of the sender. */
> > > > +		DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr,
> > > > msg->msg_name);
> > > > +
> > > > +		vsock_addr_init(vm_addr,
> > > > le64_to_cpu(vsock_hdr(skb)->src_cid),
> > > > +				le32_to_cpu(vsock_hdr(skb)-
> > > > >src_port));
> > > > +		msg->msg_namelen = sizeof(*vm_addr);
> > > > +	}
> > > > +	return total;
> > > > +}
> > > > +
> > > > +static s64 virtio_transport_dgram_has_data(struct vsock_sock
> > > > *vsk)
> > > > +{
> > > > +	return virtio_transport_stream_has_data(vsk);
> > > > +}
> > > > +
> > > >  int
> > > >  virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
> > > >  				   struct msghdr *msg,
> > > > @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct
> > > > vsock_sock *vsk,
> > > >  			       struct msghdr *msg,
> > > >  			       size_t len, int flags)
> > > >  {
> > > > -	return -EOPNOTSUPP;
> > > > +	struct sock *sk;
> > > > +	size_t err = 0;
> > > > +	long timeout;
> > > > +
> > > > +	DEFINE_WAIT(wait);
> > > > +
> > > > +	sk = &vsk->sk;
> > > > +	err = 0;
> > > > +
> > > > +	if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags &
> > > > MSG_PEEK)
> > > > +		return -EOPNOTSUPP;
> > > > +
> > > > +	lock_sock(sk);
> > > > +
> > > > +	if (!len)
> > > > +		goto out;
> > > > +
> > > > +	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> > > > +
> > > > +	while (1) {
> > > > +		s64 ready;
> > > > +
> > > > +		prepare_to_wait(sk_sleep(sk), &wait,
> > > > TASK_INTERRUPTIBLE);
> > > > +		ready = virtio_transport_dgram_has_data(vsk);
> > > > +
> > > > +		if (ready == 0) {
> > > > +			if (timeout == 0) {
> > > > +				err = -EAGAIN;
> > > > +				finish_wait(sk_sleep(sk),
> > > > &wait);
> > > > +				break;
> > > > +			}
> > > > +
> > > > +			release_sock(sk);
> > > > +			timeout = schedule_timeout(timeout);
> > > > +			lock_sock(sk);
> > > > +
> > > > +			if (signal_pending(current)) {
> > > > +				err = sock_intr_errno(timeout);
> > > > +				finish_wait(sk_sleep(sk),
> > > > &wait);
> > > > +				break;
> > > > +			} else if (timeout == 0) {
> > > > +				err = -EAGAIN;
> > > > +				finish_wait(sk_sleep(sk),
> > > > &wait);
> > > > +				break;
> > > > +			}
> > > > +		} else {
> > > > +			finish_wait(sk_sleep(sk), &wait);
> > > > +
> > > > +			if (ready < 0) {
> > > > +				err = -ENOMEM;
> > > > +				goto out;
> > > > +			}
> > > > +
> > > > +			err =
> > > > virtio_transport_dgram_do_dequeue(vsk, msg, len);
> > > > +			break;
> > > > +		}
> > > > +	}
> > > > +out:
> > > > +	release_sock(sk);
> > > > +	return err;
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
> > > >  
> > > > @@ -819,13 +942,13 @@
> > > > EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> > > >  int virtio_transport_dgram_bind(struct vsock_sock *vsk,
> > > >  				struct sockaddr_vm *addr)
> > > >  {
> > > > -	return -EOPNOTSUPP;
> > > > +	return vsock_bind_stream(vsk, addr);
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
> > > >  
> > > >  bool virtio_transport_dgram_allow(u32 cid, u32 port)
> > > >  {
> > > > -	return false;
> > > > +	return true;
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
> > > >  
> > > > @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct
> > > > vsock_sock *vsk,
> > > >  			       struct msghdr *msg,
> > > >  			       size_t dgram_len)
> > > >  {
> > > > -	return -EOPNOTSUPP;
> > > > +	struct virtio_vsock_pkt_info info = {
> > > > +		.op = VIRTIO_VSOCK_OP_RW,
> > > > +		.msg = msg,
> > > > +		.pkt_len = dgram_len,
> > > > +		.vsk = vsk,
> > > > +		.remote_cid = remote_addr->svm_cid,
> > > > +		.remote_port = remote_addr->svm_port,
> > > > +	};
> > > > +
> > > > +	return virtio_transport_send_pkt_info(vsk, &info);
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
> > > >  
> > > > @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct
> > > > sock *sk,
> > > >  	struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
> > > >  	int err = 0;
> > > >  
> > > > +	if (le16_to_cpu(vsock_hdr(skb)->type) ==
> > > > VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > +		virtio_transport_recv_enqueue(vsk, skb);
> > > > +		sk->sk_data_ready(sk);
> > > > +		return err;
> > > > +	}
> > > > +
> > > >  	switch (le16_to_cpu(hdr->op)) {
> > > >  	case VIRTIO_VSOCK_OP_RW:
> > > >  		virtio_transport_recv_enqueue(vsk, skb);
> > > > @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock
> > > > *sk, struct sk_buff *skb,
> > > >  static bool virtio_transport_valid_type(u16 type)
> > > >  {
> > > >  	return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> > > > -	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> > > > +	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> > > > +	       (type == VIRTIO_VSOCK_TYPE_DGRAM);
> > > >  }
> > > >  
> > > >  /* We are under the virtio-vsock's vsock->rx_lock or vhost-
> > > > vsock's vq->mutex
> > > > @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct
> > > > virtio_transport *t,
> > > >  		goto free_pkt;
> > > >  	}
> > > >  
> > > > +	if (sk->sk_type == SOCK_DGRAM) {
> > > > +		virtio_transport_recv_connected(sk, skb);
> > > > +		goto out;
> > > > +	}
> > > > +
> > > >  	space_available = virtio_transport_space_update(sk,
> > > > skb);
> > > >  
> > > >  	/* Update CID in case it has changed after a transport
> > > > reset event */
> > > > @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct
> > > > virtio_transport *t,
> > > >  		break;
> > > >  	}
> > > >  
> > > > +out:
> > > >  	release_sock(sk);
> > > >  
> > > >  	/* Release refcnt obtained when we fetched this socket
> > > > out of the
> > > > -- 
> > > > 2.35.1
> > > > 
> > > 
> > > ---------------------------------------------------------------
> > > ------
> > > To unsubscribe, e-mail: 
> > > virtio-dev-unsubscribe@lists.oasis-open.org
> > > For additional commands, e-mail: 
> > > virtio-dev-help@lists.oasis-open.org
> > >
Arseniy Krasnov Aug. 18, 2022, 8:35 a.m. UTC | #9
On Tue, 2022-08-16 at 09:58 +0000, Bobby Eshleman wrote:
> On Wed, Aug 17, 2022 at 05:42:08AM +0000, Arseniy Krasnov wrote:
> > On 17.08.2022 08:01, Arseniy Krasnov wrote:
> > > On 16.08.2022 05:32, Bobby Eshleman wrote:
> > > > CC'ing virtio-dev@lists.oasis-open.org
> > > > 
> > > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
> > > > > This patch supports dgram in virtio and on the vhost side.
> > > Hello,
> > > 
> > > sorry, i don't understand, how this maintains message boundaries?
> > > Or it
> > > is unnecessary for SOCK_DGRAM?
> > > 
> > > Thanks
> > > > > Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
> > > > > Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> > > > > ---
> > > > >  drivers/vhost/vsock.c                   |   2 +-
> > > > >  include/net/af_vsock.h                  |   2 +
> > > > >  include/uapi/linux/virtio_vsock.h       |   1 +
> > > > >  net/vmw_vsock/af_vsock.c                |  26 +++-
> > > > >  net/vmw_vsock/virtio_transport.c        |   2 +-
> > > > >  net/vmw_vsock/virtio_transport_common.c | 173
> > > > > ++++++++++++++++++++++--
> > > > >  6 files changed, 186 insertions(+), 20 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> > > > > index a5d1bdb786fe..3dc72a5647ca 100644
> > > > > --- a/drivers/vhost/vsock.c
> > > > > +++ b/drivers/vhost/vsock.c
> > > > > @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
> > > > >  	int ret;
> > > > >  
> > > > >  	ret = vsock_core_register(&vhost_transport.transport,
> > > > > -				  VSOCK_TRANSPORT_F_H2G);
> > > > > +				  VSOCK_TRANSPORT_F_H2G |
> > > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > >  	if (ret < 0)
> > > > >  		return ret;
> > > > >  
> > > > > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> > > > > index 1c53c4c4d88f..37e55c81e4df 100644
> > > > > --- a/include/net/af_vsock.h
> > > > > +++ b/include/net/af_vsock.h
> > > > > @@ -78,6 +78,8 @@ struct vsock_sock {
> > > > >  s64 vsock_stream_has_data(struct vsock_sock *vsk);
> > > > >  s64 vsock_stream_has_space(struct vsock_sock *vsk);
> > > > >  struct sock *vsock_create_connected(struct sock *parent);
> > > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > > +		      struct sockaddr_vm *addr);
> > > > >  
> > > > >  /**** TRANSPORT ****/
> > > > >  
> > > > > diff --git a/include/uapi/linux/virtio_vsock.h
> > > > > b/include/uapi/linux/virtio_vsock.h
> > > > > index 857df3a3a70d..0975b9c88292 100644
> > > > > --- a/include/uapi/linux/virtio_vsock.h
> > > > > +++ b/include/uapi/linux/virtio_vsock.h
> > > > > @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> > > > >  enum virtio_vsock_type {
> > > > >  	VIRTIO_VSOCK_TYPE_STREAM = 1,
> > > > >  	VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> > > > > +	VIRTIO_VSOCK_TYPE_DGRAM = 3,
> > > > >  };
> > > > >  
> > > > >  enum virtio_vsock_op {
> > > > > diff --git a/net/vmw_vsock/af_vsock.c
> > > > > b/net/vmw_vsock/af_vsock.c
> > > > > index 1893f8aafa48..87e4ae1866d3 100644
> > > > > --- a/net/vmw_vsock/af_vsock.c
> > > > > +++ b/net/vmw_vsock/af_vsock.c
> > > > > @@ -675,6 +675,19 @@ static int
> > > > > __vsock_bind_connectible(struct vsock_sock *vsk,
> > > > >  	return 0;
> > > > >  }
> > > > >  
> > > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > > +		      struct sockaddr_vm *addr)
> > > > > +{
> > > > > +	int retval;
> > > > > +
> > > > > +	spin_lock_bh(&vsock_table_lock);
> > > > > +	retval = __vsock_bind_connectible(vsk, addr);
> > > > > +	spin_unlock_bh(&vsock_table_lock);
> > > > > +
> > > > > +	return retval;
> > > > > +}
> > > > > +EXPORT_SYMBOL(vsock_bind_stream);
> > > > > +
> > > > >  static int __vsock_bind_dgram(struct vsock_sock *vsk,
> > > > >  			      struct sockaddr_vm *addr)
> > > > >  {
> > > > > @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct
> > > > > vsock_transport *t, int features)
> > > > >  	}
> > > > >  
> > > > >  	if (features & VSOCK_TRANSPORT_F_DGRAM) {
> > > > > -		if (t_dgram) {
> > > > > -			err = -EBUSY;
> > > > > -			goto err_busy;
> > > > > +		/* TODO: always chose the G2H variant over
> > > > > others, support nesting later */
> > > > > +		if (features & VSOCK_TRANSPORT_F_G2H) {
> > > > > +			if (t_dgram)
> > > > > +				pr_warn("virtio_vsock: t_dgram
> > > > > already set\n");
> > > > > +			t_dgram = t;
> > > > > +		}
> > > > > +
> > > > > +		if (!t_dgram) {
> > > > > +			t_dgram = t;
> > > > >  		}
> > > > > -		t_dgram = t;
> > > > >  	}
> > > > >  
> > > > >  	if (features & VSOCK_TRANSPORT_F_LOCAL) {
> > > > > diff --git a/net/vmw_vsock/virtio_transport.c
> > > > > b/net/vmw_vsock/virtio_transport.c
> > > > > index 073314312683..d4526ca462d2 100644
> > > > > --- a/net/vmw_vsock/virtio_transport.c
> > > > > +++ b/net/vmw_vsock/virtio_transport.c
> > > > > @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
> > > > >  		return -ENOMEM;
> > > > >  
> > > > >  	ret = vsock_core_register(&virtio_transport.transport,
> > > > > -				  VSOCK_TRANSPORT_F_G2H);
> > > > > +				  VSOCK_TRANSPORT_F_G2H |
> > > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > >  	if (ret)
> > > > >  		goto out_wq;
> > > > >  
> > > > > diff --git a/net/vmw_vsock/virtio_transport_common.c
> > > > > b/net/vmw_vsock/virtio_transport_common.c
> > > > > index bdf16fff054f..aedb48728677 100644
> > > > > --- a/net/vmw_vsock/virtio_transport_common.c
> > > > > +++ b/net/vmw_vsock/virtio_transport_common.c
> > > > > @@ -229,7 +229,9 @@
> > > > > EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> > > > >  
> > > > >  static u16 virtio_transport_get_type(struct sock *sk)
> > > > >  {
> > > > > -	if (sk->sk_type == SOCK_STREAM)
> > > > > +	if (sk->sk_type == SOCK_DGRAM)
> > > > > +		return VIRTIO_VSOCK_TYPE_DGRAM;
> > > > > +	else if (sk->sk_type == SOCK_STREAM)
> > > > >  		return VIRTIO_VSOCK_TYPE_STREAM;
> > > > >  	else
> > > > >  		return VIRTIO_VSOCK_TYPE_SEQPACKET;
> > > > > @@ -287,22 +289,29 @@ static int
> > > > > virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> > > > >  	vvs = vsk->trans;
> > > > >  
> > > > >  	/* we can send less than pkt_len bytes */
> > > > > -	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> > > > > -		pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > > +	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> > > > > +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > > +			pkt_len =
> > > > > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > > +		else
> > > > > +			return 0;
> > > > > +	}
> > > > >  
> > > > > -	/* virtio_transport_get_credit might return less than
> > > > > pkt_len credit */
> > > > > -	pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> > > > > +	if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > > +		/* virtio_transport_get_credit might return
> > > > > less than pkt_len credit */
> > > > > +		pkt_len = virtio_transport_get_credit(vvs,
> > > > > pkt_len);
> > > > >  
> > > > > -	/* Do not send zero length OP_RW pkt */
> > > > > -	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> > > > > -		return pkt_len;
> > > > > +		/* Do not send zero length OP_RW pkt */
> > > > > +		if (pkt_len == 0 && info->op ==
> > > > > VIRTIO_VSOCK_OP_RW)
> > > > > +			return pkt_len;
> > > > > +	}
> > > > >  
> > > > >  	skb = virtio_transport_alloc_skb(info, pkt_len,
> > > > >  					 src_cid, src_port,
> > > > >  					 dst_cid, dst_port,
> > > > >  					 &err);
> > > > >  	if (!skb) {
> > > > > -		virtio_transport_put_credit(vvs, pkt_len);
> > > > > +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > > +			virtio_transport_put_credit(vvs,
> > > > > pkt_len);
> > > > >  		return err;
> > > > >  	}
> > > > >  
> > > > > @@ -586,6 +595,61 @@
> > > > > virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
> > > > >  }
> > > > >  EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
> > > > >  
> > > > > +static ssize_t
> > > > > +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
> > > > > +				  struct msghdr *msg, size_t
> > > > > len)
> > > > > +{
> > > > > +	struct virtio_vsock_sock *vvs = vsk->trans;
> > > > > +	struct sk_buff *skb;
> > > > > +	size_t total = 0;
> > > > > +	u32 free_space;
> > > > > +	int err = -EFAULT;
> > > > > +
> > > > > +	spin_lock_bh(&vvs->rx_lock);
> > > > > +	if (total < len && !skb_queue_empty_lockless(&vvs-
> > > > > >rx_queue)) {
> > > > > +		skb = __skb_dequeue(&vvs->rx_queue);
> > > > > +
> > > > > +		total = len;
> > > > > +		if (total > skb->len - vsock_metadata(skb)-
> > > > > >off)
> > > > > +			total = skb->len - vsock_metadata(skb)-
> > > > > >off;
> > > > > +		else if (total < skb->len -
> > > > > vsock_metadata(skb)->off)
> > > > > +			msg->msg_flags |= MSG_TRUNC;
> > > > > +
> > > > > +		/* sk_lock is held by caller so no one else can
> > > > > dequeue.
> > > > > +		 * Unlock rx_lock since memcpy_to_msg() may
> > > > > sleep.
> > > > > +		 */
> > > > > +		spin_unlock_bh(&vvs->rx_lock);
> > > > > +
> > > > > +		err = memcpy_to_msg(msg, skb->data +
> > > > > vsock_metadata(skb)->off, total);
> > > > > +		if (err)
> > > > > +			return err;
> > > > > +
> > > > > +		spin_lock_bh(&vvs->rx_lock);
> > > > > +
> > > > > +		virtio_transport_dec_rx_pkt(vvs, skb);
> > > > > +		consume_skb(skb);
> > > > > +	}
> > > > > +
> > > > > +	free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs-
> > > > > >last_fwd_cnt);
> > > > > +
> > > > > +	spin_unlock_bh(&vvs->rx_lock);
> > > > > +
> > > > > +	if (total > 0 && msg->msg_name) {
> > > > > +		/* Provide the address of the sender. */
> > > > > +		DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr,
> > > > > msg->msg_name);
> > > > > +
> > > > > +		vsock_addr_init(vm_addr,
> > > > > le64_to_cpu(vsock_hdr(skb)->src_cid),
> > > > > +				le32_to_cpu(vsock_hdr(skb)-
> > > > > >src_port));
> > > > > +		msg->msg_namelen = sizeof(*vm_addr);
> > > > > +	}
> > > > > +	return total;
> > > > > +}
> > > > > +
> > > > > +static s64 virtio_transport_dgram_has_data(struct vsock_sock
> > > > > *vsk)
> > > > > +{
> > > > > +	return virtio_transport_stream_has_data(vsk);
> > > > > +}
> > > > > +
> > > > >  int
> > > > >  virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
> > > > >  				   struct msghdr *msg,
> > > > > @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct
> > > > > vsock_sock *vsk,
> > > > >  			       struct msghdr *msg,
> > > > >  			       size_t len, int flags)
> > > > >  {
> > > > > -	return -EOPNOTSUPP;
> > > > > +	struct sock *sk;
> > > > > +	size_t err = 0;
> > > > > +	long timeout;
> > > > > +
> > > > > +	DEFINE_WAIT(wait);
> > > > > +
> > > > > +	sk = &vsk->sk;
> > > > > +	err = 0;
> > > > > +
> > > > > +	if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags &
> > > > > MSG_PEEK)
> > > > > +		return -EOPNOTSUPP;
> > > > > +
> > > > > +	lock_sock(sk);
> > > > > +
> > > > > +	if (!len)
> > > > > +		goto out;
> > > > > +
> > > > > +	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> > > > > +
> > > > > +	while (1) {
> > > > > +		s64 ready;
> > > > > +
> > > > > +		prepare_to_wait(sk_sleep(sk), &wait,
> > > > > TASK_INTERRUPTIBLE);
> > > > > +		ready = virtio_transport_dgram_has_data(vsk);
> > > > > +
> > > > > +		if (ready == 0) {
> > > > > +			if (timeout == 0) {
> > > > > +				err = -EAGAIN;
> > > > > +				finish_wait(sk_sleep(sk),
> > > > > &wait);
> > > > > +				break;
> > > > > +			}
> > > > > +
> > > > > +			release_sock(sk);
> > > > > +			timeout = schedule_timeout(timeout);
> > > > > +			lock_sock(sk);
> > > > > +
> > > > > +			if (signal_pending(current)) {
> > > > > +				err = sock_intr_errno(timeout);
> > > > > +				finish_wait(sk_sleep(sk),
> > > > > &wait);
> > > > > +				break;
> > > > > +			} else if (timeout == 0) {
> > > > > +				err = -EAGAIN;
> > > > > +				finish_wait(sk_sleep(sk),
> > > > > &wait);
> > > > > +				break;
> > > > > +			}
> > > > > +		} else {
> > > > > +			finish_wait(sk_sleep(sk), &wait);
> > > > > +
> > > > > +			if (ready < 0) {
> > > > > +				err = -ENOMEM;
> > > > > +				goto out;
> > > > > +			}
> > > > > +
> > > > > +			err =
> > > > > virtio_transport_dgram_do_dequeue(vsk, msg, len);
> > > > > +			break;
> > > > > +		}
> > > > > +	}
> > > > > +out:
> > > > > +	release_sock(sk);
> > > > > +	return err;
> > > > >  }
> > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
> > ^^^
> > May be, this generic data waiting logic should be in af_vsock.c, as
> > for stream/seqpacket?
> > In this way, another transport which supports SOCK_DGRAM could
> > reuse it.
> 
> I think that is a great idea. I'll test that change for v2.
> 
> Thanks.

Also for v2, i tested Your patchset a little bit(write here to not
spread over all mails):
1) seqpacket test in vsock_test.c fails(seems MSG_EOR flag issue)
2) i can't do rmmod with the following config(after testing):
   CONFIG_VSOCKETS=m
   CONFIG_VIRTIO_VSOCKETS=m
   CONFIG_VIRTIO_VSOCKETS_COMMON=m
   CONFIG_VHOST=m
   CONFIG_VHOST_VSOCK=m
   Guest is shutdown, but rmmod fails.
3) virtio_transport_init + virtio_transport_exit seems must be
   under EXPORT_SYMBOL_GPL(), because both used in another module.
4) I tried to send 5kb(or 20kb not matter) piece of data, but got      
   kernel panic both in guest and later in host.

Thank You
> 
> > > > >  
> > > > > @@ -819,13 +942,13 @@
> > > > > EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> > > > >  int virtio_transport_dgram_bind(struct vsock_sock *vsk,
> > > > >  				struct sockaddr_vm *addr)
> > > > >  {
> > > > > -	return -EOPNOTSUPP;
> > > > > +	return vsock_bind_stream(vsk, addr);
> > > > >  }
> > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
> > > > >  
> > > > >  bool virtio_transport_dgram_allow(u32 cid, u32 port)
> > > > >  {
> > > > > -	return false;
> > > > > +	return true;
> > > > >  }
> > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
> > > > >  
> > > > > @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct
> > > > > vsock_sock *vsk,
> > > > >  			       struct msghdr *msg,
> > > > >  			       size_t dgram_len)
> > > > >  {
> > > > > -	return -EOPNOTSUPP;
> > > > > +	struct virtio_vsock_pkt_info info = {
> > > > > +		.op = VIRTIO_VSOCK_OP_RW,
> > > > > +		.msg = msg,
> > > > > +		.pkt_len = dgram_len,
> > > > > +		.vsk = vsk,
> > > > > +		.remote_cid = remote_addr->svm_cid,
> > > > > +		.remote_port = remote_addr->svm_port,
> > > > > +	};
> > > > > +
> > > > > +	return virtio_transport_send_pkt_info(vsk, &info);
> > > > >  }
> > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
> > > > >  
> > > > > @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct
> > > > > sock *sk,
> > > > >  	struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
> > > > >  	int err = 0;
> > > > >  
> > > > > +	if (le16_to_cpu(vsock_hdr(skb)->type) ==
> > > > > VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > > +		virtio_transport_recv_enqueue(vsk, skb);
> > > > > +		sk->sk_data_ready(sk);
> > > > > +		return err;
> > > > > +	}
> > > > > +
> > > > >  	switch (le16_to_cpu(hdr->op)) {
> > > > >  	case VIRTIO_VSOCK_OP_RW:
> > > > >  		virtio_transport_recv_enqueue(vsk, skb);
> > > > > @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct
> > > > > sock *sk, struct sk_buff *skb,
> > > > >  static bool virtio_transport_valid_type(u16 type)
> > > > >  {
> > > > >  	return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> > > > > -	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> > > > > +	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> > > > > +	       (type == VIRTIO_VSOCK_TYPE_DGRAM);
> > > > >  }
> > > > >  
> > > > >  /* We are under the virtio-vsock's vsock->rx_lock or vhost-
> > > > > vsock's vq->mutex
> > > > > @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct
> > > > > virtio_transport *t,
> > > > >  		goto free_pkt;
> > > > >  	}
> > > > >  
> > > > > +	if (sk->sk_type == SOCK_DGRAM) {
> > > > > +		virtio_transport_recv_connected(sk, skb);
> > > > > +		goto out;
> > > > > +	}
> > > > > +
> > > > >  	space_available = virtio_transport_space_update(sk,
> > > > > skb);
> > > > >  
> > > > >  	/* Update CID in case it has changed after a transport
> > > > > reset event */
> > > > > @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct
> > > > > virtio_transport *t,
> > > > >  		break;
> > > > >  	}
> > > > >  
> > > > > +out:
> > > > >  	release_sock(sk);
> > > > >  
> > > > >  	/* Release refcnt obtained when we fetched this socket
> > > > > out of the
> > > > > -- 
> > > > > 2.35.1
> > > > > 
> > > > 
> > > > -------------------------------------------------------------
> > > > --------
> > > > To unsubscribe, e-mail: 
> > > > virtio-dev-unsubscribe@lists.oasis-open.org
> > > > For additional commands, e-mail: 
> > > > virtio-dev-help@lists.oasis-open.org
> > > > 
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>
Arseniy Krasnov Aug. 19, 2022, 4:30 a.m. UTC | #10
On Tue, 2022-08-16 at 20:52 +0000, Bobby Eshleman wrote:
> On Thu, Aug 18, 2022 at 08:35:48AM +0000, Arseniy Krasnov wrote:
> > On Tue, 2022-08-16 at 09:58 +0000, Bobby Eshleman wrote:
> > > On Wed, Aug 17, 2022 at 05:42:08AM +0000, Arseniy Krasnov wrote:
> > > > On 17.08.2022 08:01, Arseniy Krasnov wrote:
> > > > > On 16.08.2022 05:32, Bobby Eshleman wrote:
> > > > > > CC'ing virtio-dev@lists.oasis-open.org
> > > > > > 
> > > > > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman
> > > > > > wrote:
> > > > > > > This patch supports dgram in virtio and on the vhost
> > > > > > > side.
> > > > > Hello,
> > > > > 
> > > > > sorry, i don't understand, how this maintains message
> > > > > boundaries?
> > > > > Or it
> > > > > is unnecessary for SOCK_DGRAM?
> > > > > 
> > > > > Thanks
> > > > > > > Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
> > > > > > > Signed-off-by: Bobby Eshleman <
> > > > > > > bobby.eshleman@bytedance.com>
> > > > > > > ---
> > > > > > >  drivers/vhost/vsock.c                   |   2 +-
> > > > > > >  include/net/af_vsock.h                  |   2 +
> > > > > > >  include/uapi/linux/virtio_vsock.h       |   1 +
> > > > > > >  net/vmw_vsock/af_vsock.c                |  26 +++-
> > > > > > >  net/vmw_vsock/virtio_transport.c        |   2 +-
> > > > > > >  net/vmw_vsock/virtio_transport_common.c | 173
> > > > > > > ++++++++++++++++++++++--
> > > > > > >  6 files changed, 186 insertions(+), 20 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/vhost/vsock.c
> > > > > > > b/drivers/vhost/vsock.c
> > > > > > > index a5d1bdb786fe..3dc72a5647ca 100644
> > > > > > > --- a/drivers/vhost/vsock.c
> > > > > > > +++ b/drivers/vhost/vsock.c
> > > > > > > @@ -925,7 +925,7 @@ static int __init
> > > > > > > vhost_vsock_init(void)
> > > > > > >  	int ret;
> > > > > > >  
> > > > > > >  	ret = vsock_core_register(&vhost_transport.transport,
> > > > > > > -				  VSOCK_TRANSPORT_F_H2G);
> > > > > > > +				  VSOCK_TRANSPORT_F_H2G |
> > > > > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > > > >  	if (ret < 0)
> > > > > > >  		return ret;
> > > > > > >  
> > > > > > > diff --git a/include/net/af_vsock.h
> > > > > > > b/include/net/af_vsock.h
> > > > > > > index 1c53c4c4d88f..37e55c81e4df 100644
> > > > > > > --- a/include/net/af_vsock.h
> > > > > > > +++ b/include/net/af_vsock.h
> > > > > > > @@ -78,6 +78,8 @@ struct vsock_sock {
> > > > > > >  s64 vsock_stream_has_data(struct vsock_sock *vsk);
> > > > > > >  s64 vsock_stream_has_space(struct vsock_sock *vsk);
> > > > > > >  struct sock *vsock_create_connected(struct sock
> > > > > > > *parent);
> > > > > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > > > > +		      struct sockaddr_vm *addr);
> > > > > > >  
> > > > > > >  /**** TRANSPORT ****/
> > > > > > >  
> > > > > > > diff --git a/include/uapi/linux/virtio_vsock.h
> > > > > > > b/include/uapi/linux/virtio_vsock.h
> > > > > > > index 857df3a3a70d..0975b9c88292 100644
> > > > > > > --- a/include/uapi/linux/virtio_vsock.h
> > > > > > > +++ b/include/uapi/linux/virtio_vsock.h
> > > > > > > @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> > > > > > >  enum virtio_vsock_type {
> > > > > > >  	VIRTIO_VSOCK_TYPE_STREAM = 1,
> > > > > > >  	VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> > > > > > > +	VIRTIO_VSOCK_TYPE_DGRAM = 3,
> > > > > > >  };
> > > > > > >  
> > > > > > >  enum virtio_vsock_op {
> > > > > > > diff --git a/net/vmw_vsock/af_vsock.c
> > > > > > > b/net/vmw_vsock/af_vsock.c
> > > > > > > index 1893f8aafa48..87e4ae1866d3 100644
> > > > > > > --- a/net/vmw_vsock/af_vsock.c
> > > > > > > +++ b/net/vmw_vsock/af_vsock.c
> > > > > > > @@ -675,6 +675,19 @@ static int
> > > > > > > __vsock_bind_connectible(struct vsock_sock *vsk,
> > > > > > >  	return 0;
> > > > > > >  }
> > > > > > >  
> > > > > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > > > > +		      struct sockaddr_vm *addr)
> > > > > > > +{
> > > > > > > +	int retval;
> > > > > > > +
> > > > > > > +	spin_lock_bh(&vsock_table_lock);
> > > > > > > +	retval = __vsock_bind_connectible(vsk, addr);
> > > > > > > +	spin_unlock_bh(&vsock_table_lock);
> > > > > > > +
> > > > > > > +	return retval;
> > > > > > > +}
> > > > > > > +EXPORT_SYMBOL(vsock_bind_stream);
> > > > > > > +
> > > > > > >  static int __vsock_bind_dgram(struct vsock_sock *vsk,
> > > > > > >  			      struct sockaddr_vm *addr)
> > > > > > >  {
> > > > > > > @@ -2363,11 +2376,16 @@ int vsock_core_register(const
> > > > > > > struct
> > > > > > > vsock_transport *t, int features)
> > > > > > >  	}
> > > > > > >  
> > > > > > >  	if (features & VSOCK_TRANSPORT_F_DGRAM) {
> > > > > > > -		if (t_dgram) {
> > > > > > > -			err = -EBUSY;
> > > > > > > -			goto err_busy;
> > > > > > > +		/* TODO: always chose the G2H variant over
> > > > > > > others, support nesting later */
> > > > > > > +		if (features & VSOCK_TRANSPORT_F_G2H) {
> > > > > > > +			if (t_dgram)
> > > > > > > +				pr_warn("virtio_vsock: t_dgram
> > > > > > > already set\n");
> > > > > > > +			t_dgram = t;
> > > > > > > +		}
> > > > > > > +
> > > > > > > +		if (!t_dgram) {
> > > > > > > +			t_dgram = t;
> > > > > > >  		}
> > > > > > > -		t_dgram = t;
> > > > > > >  	}
> > > > > > >  
> > > > > > >  	if (features & VSOCK_TRANSPORT_F_LOCAL) {
> > > > > > > diff --git a/net/vmw_vsock/virtio_transport.c
> > > > > > > b/net/vmw_vsock/virtio_transport.c
> > > > > > > index 073314312683..d4526ca462d2 100644
> > > > > > > --- a/net/vmw_vsock/virtio_transport.c
> > > > > > > +++ b/net/vmw_vsock/virtio_transport.c
> > > > > > > @@ -850,7 +850,7 @@ static int __init
> > > > > > > virtio_vsock_init(void)
> > > > > > >  		return -ENOMEM;
> > > > > > >  
> > > > > > >  	ret = vsock_core_register(&virtio_transport.transport,
> > > > > > > -				  VSOCK_TRANSPORT_F_G2H);
> > > > > > > +				  VSOCK_TRANSPORT_F_G2H |
> > > > > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > > > >  	if (ret)
> > > > > > >  		goto out_wq;
> > > > > > >  
> > > > > > > diff --git a/net/vmw_vsock/virtio_transport_common.c
> > > > > > > b/net/vmw_vsock/virtio_transport_common.c
> > > > > > > index bdf16fff054f..aedb48728677 100644
> > > > > > > --- a/net/vmw_vsock/virtio_transport_common.c
> > > > > > > +++ b/net/vmw_vsock/virtio_transport_common.c
> > > > > > > @@ -229,7 +229,9 @@
> > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> > > > > > >  
> > > > > > >  static u16 virtio_transport_get_type(struct sock *sk)
> > > > > > >  {
> > > > > > > -	if (sk->sk_type == SOCK_STREAM)
> > > > > > > +	if (sk->sk_type == SOCK_DGRAM)
> > > > > > > +		return VIRTIO_VSOCK_TYPE_DGRAM;
> > > > > > > +	else if (sk->sk_type == SOCK_STREAM)
> > > > > > >  		return VIRTIO_VSOCK_TYPE_STREAM;
> > > > > > >  	else
> > > > > > >  		return VIRTIO_VSOCK_TYPE_SEQPACKET;
> > > > > > > @@ -287,22 +289,29 @@ static int
> > > > > > > virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> > > > > > >  	vvs = vsk->trans;
> > > > > > >  
> > > > > > >  	/* we can send less than pkt_len bytes */
> > > > > > > -	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> > > > > > > -		pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > > > > +	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> > > > > > > +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > > > > +			pkt_len =
> > > > > > > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > > > > +		else
> > > > > > > +			return 0;
> > > > > > > +	}
> > > > > > >  
> > > > > > > -	/* virtio_transport_get_credit might return less than
> > > > > > > pkt_len credit */
> > > > > > > -	pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> > > > > > > +	if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > > > > +		/* virtio_transport_get_credit might return
> > > > > > > less than pkt_len credit */
> > > > > > > +		pkt_len = virtio_transport_get_credit(vvs,
> > > > > > > pkt_len);
> > > > > > >  
> > > > > > > -	/* Do not send zero length OP_RW pkt */
> > > > > > > -	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> > > > > > > -		return pkt_len;
> > > > > > > +		/* Do not send zero length OP_RW pkt */
> > > > > > > +		if (pkt_len == 0 && info->op ==
> > > > > > > VIRTIO_VSOCK_OP_RW)
> > > > > > > +			return pkt_len;
> > > > > > > +	}
> > > > > > >  
> > > > > > >  	skb = virtio_transport_alloc_skb(info, pkt_len,
> > > > > > >  					 src_cid, src_port,
> > > > > > >  					 dst_cid, dst_port,
> > > > > > >  					 &err);
> > > > > > >  	if (!skb) {
> > > > > > > -		virtio_transport_put_credit(vvs, pkt_len);
> > > > > > > +		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > > > > +			virtio_transport_put_credit(vvs,
> > > > > > > pkt_len);
> > > > > > >  		return err;
> > > > > > >  	}
> > > > > > >  
> > > > > > > @@ -586,6 +595,61 @@
> > > > > > > virtio_transport_seqpacket_dequeue(struct vsock_sock
> > > > > > > *vsk,
> > > > > > >  }
> > > > > > >  EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
> > > > > > >  
> > > > > > > +static ssize_t
> > > > > > > +virtio_transport_dgram_do_dequeue(struct vsock_sock
> > > > > > > *vsk,
> > > > > > > +				  struct msghdr *msg, size_t
> > > > > > > len)
> > > > > > > +{
> > > > > > > +	struct virtio_vsock_sock *vvs = vsk->trans;
> > > > > > > +	struct sk_buff *skb;
> > > > > > > +	size_t total = 0;
> > > > > > > +	u32 free_space;
> > > > > > > +	int err = -EFAULT;
> > > > > > > +
> > > > > > > +	spin_lock_bh(&vvs->rx_lock);
> > > > > > > +	if (total < len && !skb_queue_empty_lockless(&vvs-
> > > > > > > > rx_queue)) {
> > > > > > > +		skb = __skb_dequeue(&vvs->rx_queue);
> > > > > > > +
> > > > > > > +		total = len;
> > > > > > > +		if (total > skb->len - vsock_metadata(skb)-
> > > > > > > > off)
> > > > > > > +			total = skb->len - vsock_metadata(skb)-
> > > > > > > > off;
> > > > > > > +		else if (total < skb->len -
> > > > > > > vsock_metadata(skb)->off)
> > > > > > > +			msg->msg_flags |= MSG_TRUNC;
> > > > > > > +
> > > > > > > +		/* sk_lock is held by caller so no one else can
> > > > > > > dequeue.
> > > > > > > +		 * Unlock rx_lock since memcpy_to_msg() may
> > > > > > > sleep.
> > > > > > > +		 */
> > > > > > > +		spin_unlock_bh(&vvs->rx_lock);
> > > > > > > +
> > > > > > > +		err = memcpy_to_msg(msg, skb->data +
> > > > > > > vsock_metadata(skb)->off, total);
> > > > > > > +		if (err)
> > > > > > > +			return err;
> > > > > > > +
> > > > > > > +		spin_lock_bh(&vvs->rx_lock);
> > > > > > > +
> > > > > > > +		virtio_transport_dec_rx_pkt(vvs, skb);
> > > > > > > +		consume_skb(skb);
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs-
> > > > > > > > last_fwd_cnt);
> > > > > > > +
> > > > > > > +	spin_unlock_bh(&vvs->rx_lock);
> > > > > > > +
> > > > > > > +	if (total > 0 && msg->msg_name) {
> > > > > > > +		/* Provide the address of the sender. */
> > > > > > > +		DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr,
> > > > > > > msg->msg_name);
> > > > > > > +
> > > > > > > +		vsock_addr_init(vm_addr,
> > > > > > > le64_to_cpu(vsock_hdr(skb)->src_cid),
> > > > > > > +				le32_to_cpu(vsock_hdr(skb)-
> > > > > > > > src_port));
> > > > > > > +		msg->msg_namelen = sizeof(*vm_addr);
> > > > > > > +	}
> > > > > > > +	return total;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static s64 virtio_transport_dgram_has_data(struct
> > > > > > > vsock_sock
> > > > > > > *vsk)
> > > > > > > +{
> > > > > > > +	return virtio_transport_stream_has_data(vsk);
> > > > > > > +}
> > > > > > > +
> > > > > > >  int
> > > > > > >  virtio_transport_seqpacket_enqueue(struct vsock_sock
> > > > > > > *vsk,
> > > > > > >  				   struct msghdr *msg,
> > > > > > > @@ -611,7 +675,66 @@
> > > > > > > virtio_transport_dgram_dequeue(struct
> > > > > > > vsock_sock *vsk,
> > > > > > >  			       struct msghdr *msg,
> > > > > > >  			       size_t len, int flags)
> > > > > > >  {
> > > > > > > -	return -EOPNOTSUPP;
> > > > > > > +	struct sock *sk;
> > > > > > > +	size_t err = 0;
> > > > > > > +	long timeout;
> > > > > > > +
> > > > > > > +	DEFINE_WAIT(wait);
> > > > > > > +
> > > > > > > +	sk = &vsk->sk;
> > > > > > > +	err = 0;
> > > > > > > +
> > > > > > > +	if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags &
> > > > > > > MSG_PEEK)
> > > > > > > +		return -EOPNOTSUPP;
> > > > > > > +
> > > > > > > +	lock_sock(sk);
> > > > > > > +
> > > > > > > +	if (!len)
> > > > > > > +		goto out;
> > > > > > > +
> > > > > > > +	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> > > > > > > +
> > > > > > > +	while (1) {
> > > > > > > +		s64 ready;
> > > > > > > +
> > > > > > > +		prepare_to_wait(sk_sleep(sk), &wait,
> > > > > > > TASK_INTERRUPTIBLE);
> > > > > > > +		ready = virtio_transport_dgram_has_data(vsk);
> > > > > > > +
> > > > > > > +		if (ready == 0) {
> > > > > > > +			if (timeout == 0) {
> > > > > > > +				err = -EAGAIN;
> > > > > > > +				finish_wait(sk_sleep(sk),
> > > > > > > &wait);
> > > > > > > +				break;
> > > > > > > +			}
> > > > > > > +
> > > > > > > +			release_sock(sk);
> > > > > > > +			timeout = schedule_timeout(timeout);
> > > > > > > +			lock_sock(sk);
> > > > > > > +
> > > > > > > +			if (signal_pending(current)) {
> > > > > > > +				err = sock_intr_errno(timeout);
> > > > > > > +				finish_wait(sk_sleep(sk),
> > > > > > > &wait);
> > > > > > > +				break;
> > > > > > > +			} else if (timeout == 0) {
> > > > > > > +				err = -EAGAIN;
> > > > > > > +				finish_wait(sk_sleep(sk),
> > > > > > > &wait);
> > > > > > > +				break;
> > > > > > > +			}
> > > > > > > +		} else {
> > > > > > > +			finish_wait(sk_sleep(sk), &wait);
> > > > > > > +
> > > > > > > +			if (ready < 0) {
> > > > > > > +				err = -ENOMEM;
> > > > > > > +				goto out;
> > > > > > > +			}
> > > > > > > +
> > > > > > > +			err =
> > > > > > > virtio_transport_dgram_do_dequeue(vsk, msg, len);
> > > > > > > +			break;
> > > > > > > +		}
> > > > > > > +	}
> > > > > > > +out:
> > > > > > > +	release_sock(sk);
> > > > > > > +	return err;
> > > > > > >  }
> > > > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
> > > > ^^^
> > > > May be, this generic data waiting logic should be in
> > > > af_vsock.c, as
> > > > for stream/seqpacket?
> > > > In this way, another transport which supports SOCK_DGRAM could
> > > > reuse it.
> > > 
> > > I think that is a great idea. I'll test that change for v2.
> > > 
> > > Thanks.
> > 
> > Also for v2, i tested Your patchset a little bit(write here to not
> > spread over all mails):
> > 1) seqpacket test in vsock_test.c fails(seems MSG_EOR flag issue)
> 
> I will investigate.
> 
> > 2) i can't do rmmod with the following config(after testing):
> >    CONFIG_VSOCKETS=m
> >    CONFIG_VIRTIO_VSOCKETS=m
> >    CONFIG_VIRTIO_VSOCKETS_COMMON=m
> >    CONFIG_VHOST=m
> >    CONFIG_VHOST_VSOCK=m
> >    Guest is shutdown, but rmmod fails.
> > 3) virtio_transport_init + virtio_transport_exit seems must be
> >    under EXPORT_SYMBOL_GPL(), because both used in another module.
> 
> Definitely, will fix.
> 
> > 4) I tried to send 5kb(or 20kb not matter) piece of data, but
> > got      
> >    kernel panic both in guest and later in host.
> > 
> 
> Thanks for catching that. I can reproduce it intermittently, but only
> for seqpacket. Did you happen to see this for other socket types as
> well?
> 
> Thanks

I got this for SOCK_DGRAM, i didnt test seqpacket or stream.

Thanks, Arseniy

> 
> > Thank You
> > > > > > >  
> > > > > > > @@ -819,13 +942,13 @@
> > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> > > > > > >  int virtio_transport_dgram_bind(struct vsock_sock *vsk,
> > > > > > >  				struct sockaddr_vm *addr)
> > > > > > >  {
> > > > > > > -	return -EOPNOTSUPP;
> > > > > > > +	return vsock_bind_stream(vsk, addr);
> > > > > > >  }
> > > > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
> > > > > > >  
> > > > > > >  bool virtio_transport_dgram_allow(u32 cid, u32 port)
> > > > > > >  {
> > > > > > > -	return false;
> > > > > > > +	return true;
> > > > > > >  }
> > > > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
> > > > > > >  
> > > > > > > @@ -861,7 +984,16 @@
> > > > > > > virtio_transport_dgram_enqueue(struct
> > > > > > > vsock_sock *vsk,
> > > > > > >  			       struct msghdr *msg,
> > > > > > >  			       size_t dgram_len)
> > > > > > >  {
> > > > > > > -	return -EOPNOTSUPP;
> > > > > > > +	struct virtio_vsock_pkt_info info = {
> > > > > > > +		.op = VIRTIO_VSOCK_OP_RW,
> > > > > > > +		.msg = msg,
> > > > > > > +		.pkt_len = dgram_len,
> > > > > > > +		.vsk = vsk,
> > > > > > > +		.remote_cid = remote_addr->svm_cid,
> > > > > > > +		.remote_port = remote_addr->svm_port,
> > > > > > > +	};
> > > > > > > +
> > > > > > > +	return virtio_transport_send_pkt_info(vsk, &info);
> > > > > > >  }
> > > > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
> > > > > > >  
> > > > > > > @@ -1165,6 +1297,12 @@
> > > > > > > virtio_transport_recv_connected(struct
> > > > > > > sock *sk,
> > > > > > >  	struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
> > > > > > >  	int err = 0;
> > > > > > >  
> > > > > > > +	if (le16_to_cpu(vsock_hdr(skb)->type) ==
> > > > > > > VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > > > > +		virtio_transport_recv_enqueue(vsk, skb);
> > > > > > > +		sk->sk_data_ready(sk);
> > > > > > > +		return err;
> > > > > > > +	}
> > > > > > > +
> > > > > > >  	switch (le16_to_cpu(hdr->op)) {
> > > > > > >  	case VIRTIO_VSOCK_OP_RW:
> > > > > > >  		virtio_transport_recv_enqueue(vsk, skb);
> > > > > > > @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct
> > > > > > > sock *sk, struct sk_buff *skb,
> > > > > > >  static bool virtio_transport_valid_type(u16 type)
> > > > > > >  {
> > > > > > >  	return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> > > > > > > -	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> > > > > > > +	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> > > > > > > +	       (type == VIRTIO_VSOCK_TYPE_DGRAM);
> > > > > > >  }
> > > > > > >  
> > > > > > >  /* We are under the virtio-vsock's vsock->rx_lock or
> > > > > > > vhost-
> > > > > > > vsock's vq->mutex
> > > > > > > @@ -1384,6 +1523,11 @@ void
> > > > > > > virtio_transport_recv_pkt(struct
> > > > > > > virtio_transport *t,
> > > > > > >  		goto free_pkt;
> > > > > > >  	}
> > > > > > >  
> > > > > > > +	if (sk->sk_type == SOCK_DGRAM) {
> > > > > > > +		virtio_transport_recv_connected(sk, skb);
> > > > > > > +		goto out;
> > > > > > > +	}
> > > > > > > +
> > > > > > >  	space_available = virtio_transport_space_update(sk,
> > > > > > > skb);
> > > > > > >  
> > > > > > >  	/* Update CID in case it has changed after a transport
> > > > > > > reset event */
> > > > > > > @@ -1415,6 +1559,7 @@ void
> > > > > > > virtio_transport_recv_pkt(struct
> > > > > > > virtio_transport *t,
> > > > > > >  		break;
> > > > > > >  	}
> > > > > > >  
> > > > > > > +out:
> > > > > > >  	release_sock(sk);
> > > > > > >  
> > > > > > >  	/* Release refcnt obtained when we fetched this socket
> > > > > > > out of the
> > > > > > > -- 
> > > > > > > 2.35.1
> > > > > > > 
> > > > > > 
> > > > > > ---------------------------------------------------------
> > > > > > ----
> > > > > > --------
> > > > > > To unsubscribe, e-mail: 
> > > > > > virtio-dev-unsubscribe@lists.oasis-open.org
> > > > > > For additional commands, e-mail: 
> > > > > > virtio-dev-help@lists.oasis-open.org
> > > > > > 
> > > 
> > > ---------------------------------------------------------------
> > > ------
> > > To unsubscribe, e-mail: 
> > > virtio-dev-unsubscribe@lists.oasis-open.org
> > > For additional commands, e-mail: 
> > > virtio-dev-help@lists.oasis-open.org
> > >
diff mbox series

Patch

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index a5d1bdb786fe..3dc72a5647ca 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -925,7 +925,7 @@  static int __init vhost_vsock_init(void)
 	int ret;
 
 	ret = vsock_core_register(&vhost_transport.transport,
-				  VSOCK_TRANSPORT_F_H2G);
+				  VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM);
 	if (ret < 0)
 		return ret;
 
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 1c53c4c4d88f..37e55c81e4df 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -78,6 +78,8 @@  struct vsock_sock {
 s64 vsock_stream_has_data(struct vsock_sock *vsk);
 s64 vsock_stream_has_space(struct vsock_sock *vsk);
 struct sock *vsock_create_connected(struct sock *parent);
+int vsock_bind_stream(struct vsock_sock *vsk,
+		      struct sockaddr_vm *addr);
 
 /**** TRANSPORT ****/
 
diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
index 857df3a3a70d..0975b9c88292 100644
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h
@@ -70,6 +70,7 @@  struct virtio_vsock_hdr {
 enum virtio_vsock_type {
 	VIRTIO_VSOCK_TYPE_STREAM = 1,
 	VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
+	VIRTIO_VSOCK_TYPE_DGRAM = 3,
 };
 
 enum virtio_vsock_op {
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 1893f8aafa48..87e4ae1866d3 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -675,6 +675,19 @@  static int __vsock_bind_connectible(struct vsock_sock *vsk,
 	return 0;
 }
 
+int vsock_bind_stream(struct vsock_sock *vsk,
+		      struct sockaddr_vm *addr)
+{
+	int retval;
+
+	spin_lock_bh(&vsock_table_lock);
+	retval = __vsock_bind_connectible(vsk, addr);
+	spin_unlock_bh(&vsock_table_lock);
+
+	return retval;
+}
+EXPORT_SYMBOL(vsock_bind_stream);
+
 static int __vsock_bind_dgram(struct vsock_sock *vsk,
 			      struct sockaddr_vm *addr)
 {
@@ -2363,11 +2376,16 @@  int vsock_core_register(const struct vsock_transport *t, int features)
 	}
 
 	if (features & VSOCK_TRANSPORT_F_DGRAM) {
-		if (t_dgram) {
-			err = -EBUSY;
-			goto err_busy;
+		/* TODO: always chose the G2H variant over others, support nesting later */
+		if (features & VSOCK_TRANSPORT_F_G2H) {
+			if (t_dgram)
+				pr_warn("virtio_vsock: t_dgram already set\n");
+			t_dgram = t;
+		}
+
+		if (!t_dgram) {
+			t_dgram = t;
 		}
-		t_dgram = t;
 	}
 
 	if (features & VSOCK_TRANSPORT_F_LOCAL) {
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 073314312683..d4526ca462d2 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -850,7 +850,7 @@  static int __init virtio_vsock_init(void)
 		return -ENOMEM;
 
 	ret = vsock_core_register(&virtio_transport.transport,
-				  VSOCK_TRANSPORT_F_G2H);
+				  VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM);
 	if (ret)
 		goto out_wq;
 
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index bdf16fff054f..aedb48728677 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -229,7 +229,9 @@  EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
 
 static u16 virtio_transport_get_type(struct sock *sk)
 {
-	if (sk->sk_type == SOCK_STREAM)
+	if (sk->sk_type == SOCK_DGRAM)
+		return VIRTIO_VSOCK_TYPE_DGRAM;
+	else if (sk->sk_type == SOCK_STREAM)
 		return VIRTIO_VSOCK_TYPE_STREAM;
 	else
 		return VIRTIO_VSOCK_TYPE_SEQPACKET;
@@ -287,22 +289,29 @@  static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
 	vvs = vsk->trans;
 
 	/* we can send less than pkt_len bytes */
-	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
-		pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
+	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
+		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
+			pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
+		else
+			return 0;
+	}
 
-	/* virtio_transport_get_credit might return less than pkt_len credit */
-	pkt_len = virtio_transport_get_credit(vvs, pkt_len);
+	if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
+		/* virtio_transport_get_credit might return less than pkt_len credit */
+		pkt_len = virtio_transport_get_credit(vvs, pkt_len);
 
-	/* Do not send zero length OP_RW pkt */
-	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
-		return pkt_len;
+		/* Do not send zero length OP_RW pkt */
+		if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
+			return pkt_len;
+	}
 
 	skb = virtio_transport_alloc_skb(info, pkt_len,
 					 src_cid, src_port,
 					 dst_cid, dst_port,
 					 &err);
 	if (!skb) {
-		virtio_transport_put_credit(vvs, pkt_len);
+		if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
+			virtio_transport_put_credit(vvs, pkt_len);
 		return err;
 	}
 
@@ -586,6 +595,61 @@  virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
 }
 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
 
+static ssize_t
+virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
+				  struct msghdr *msg, size_t len)
+{
+	struct virtio_vsock_sock *vvs = vsk->trans;
+	struct sk_buff *skb;
+	size_t total = 0;
+	u32 free_space;
+	int err = -EFAULT;
+
+	spin_lock_bh(&vvs->rx_lock);
+	if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
+		skb = __skb_dequeue(&vvs->rx_queue);
+
+		total = len;
+		if (total > skb->len - vsock_metadata(skb)->off)
+			total = skb->len - vsock_metadata(skb)->off;
+		else if (total < skb->len - vsock_metadata(skb)->off)
+			msg->msg_flags |= MSG_TRUNC;
+
+		/* sk_lock is held by caller so no one else can dequeue.
+		 * Unlock rx_lock since memcpy_to_msg() may sleep.
+		 */
+		spin_unlock_bh(&vvs->rx_lock);
+
+		err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total);
+		if (err)
+			return err;
+
+		spin_lock_bh(&vvs->rx_lock);
+
+		virtio_transport_dec_rx_pkt(vvs, skb);
+		consume_skb(skb);
+	}
+
+	free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
+
+	spin_unlock_bh(&vvs->rx_lock);
+
+	if (total > 0 && msg->msg_name) {
+		/* Provide the address of the sender. */
+		DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
+
+		vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
+				le32_to_cpu(vsock_hdr(skb)->src_port));
+		msg->msg_namelen = sizeof(*vm_addr);
+	}
+	return total;
+}
+
+static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk)
+{
+	return virtio_transport_stream_has_data(vsk);
+}
+
 int
 virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
 				   struct msghdr *msg,
@@ -611,7 +675,66 @@  virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
 			       struct msghdr *msg,
 			       size_t len, int flags)
 {
-	return -EOPNOTSUPP;
+	struct sock *sk;
+	size_t err = 0;
+	long timeout;
+
+	DEFINE_WAIT(wait);
+
+	sk = &vsk->sk;
+	err = 0;
+
+	if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK)
+		return -EOPNOTSUPP;
+
+	lock_sock(sk);
+
+	if (!len)
+		goto out;
+
+	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+	while (1) {
+		s64 ready;
+
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+		ready = virtio_transport_dgram_has_data(vsk);
+
+		if (ready == 0) {
+			if (timeout == 0) {
+				err = -EAGAIN;
+				finish_wait(sk_sleep(sk), &wait);
+				break;
+			}
+
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeout);
+				finish_wait(sk_sleep(sk), &wait);
+				break;
+			} else if (timeout == 0) {
+				err = -EAGAIN;
+				finish_wait(sk_sleep(sk), &wait);
+				break;
+			}
+		} else {
+			finish_wait(sk_sleep(sk), &wait);
+
+			if (ready < 0) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			err = virtio_transport_dgram_do_dequeue(vsk, msg, len);
+			break;
+		}
+	}
+out:
+	release_sock(sk);
+	return err;
 }
 EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
 
@@ -819,13 +942,13 @@  EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
 int virtio_transport_dgram_bind(struct vsock_sock *vsk,
 				struct sockaddr_vm *addr)
 {
-	return -EOPNOTSUPP;
+	return vsock_bind_stream(vsk, addr);
 }
 EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
 
 bool virtio_transport_dgram_allow(u32 cid, u32 port)
 {
-	return false;
+	return true;
 }
 EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
 
@@ -861,7 +984,16 @@  virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
 			       struct msghdr *msg,
 			       size_t dgram_len)
 {
-	return -EOPNOTSUPP;
+	struct virtio_vsock_pkt_info info = {
+		.op = VIRTIO_VSOCK_OP_RW,
+		.msg = msg,
+		.pkt_len = dgram_len,
+		.vsk = vsk,
+		.remote_cid = remote_addr->svm_cid,
+		.remote_port = remote_addr->svm_port,
+	};
+
+	return virtio_transport_send_pkt_info(vsk, &info);
 }
 EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
 
@@ -1165,6 +1297,12 @@  virtio_transport_recv_connected(struct sock *sk,
 	struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
 	int err = 0;
 
+	if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
+		virtio_transport_recv_enqueue(vsk, skb);
+		sk->sk_data_ready(sk);
+		return err;
+	}
+
 	switch (le16_to_cpu(hdr->op)) {
 	case VIRTIO_VSOCK_OP_RW:
 		virtio_transport_recv_enqueue(vsk, skb);
@@ -1320,7 +1458,8 @@  virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
 static bool virtio_transport_valid_type(u16 type)
 {
 	return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
-	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
+	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
+	       (type == VIRTIO_VSOCK_TYPE_DGRAM);
 }
 
 /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
@@ -1384,6 +1523,11 @@  void virtio_transport_recv_pkt(struct virtio_transport *t,
 		goto free_pkt;
 	}
 
+	if (sk->sk_type == SOCK_DGRAM) {
+		virtio_transport_recv_connected(sk, skb);
+		goto out;
+	}
+
 	space_available = virtio_transport_space_update(sk, skb);
 
 	/* Update CID in case it has changed after a transport reset event */
@@ -1415,6 +1559,7 @@  void virtio_transport_recv_pkt(struct virtio_transport *t,
 		break;
 	}
 
+out:
 	release_sock(sk);
 
 	/* Release refcnt obtained when we fetched this socket out of the