diff mbox series

[bpf-next,v3,02/10] af_unix: implement ->read_sock() for sockmap

Message ID 20210426025001.7899-3-xiyou.wangcong@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series sockmap: add sockmap support to Unix datagram socket | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 13 maintainers not CCed: gustavoars@kernel.org yhs@fb.com kpsingh@kernel.org andrii@kernel.org jingxiangfeng@huawei.com kafai@fb.com jamorris@linux.microsoft.com ast@kernel.org orcohen2006@gmail.com songliubraving@fb.com christian.brauner@ubuntu.com davem@davemloft.net kuba@kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 4 this patch: 4
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 56 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 4 this patch: 4
netdev/header_inline success Link

Commit Message

Cong Wang April 26, 2021, 2:49 a.m. UTC
From: Cong Wang <cong.wang@bytedance.com>

Implement ->read_sock() for AF_UNIX datagram socket, it is
pretty much similar to udp_read_sock().

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 net/unix/af_unix.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

Comments

Jakub Sitnicki May 5, 2021, 5:14 p.m. UTC | #1
On Mon, Apr 26, 2021 at 04:49 AM CEST, Cong Wang wrote:
> From: Cong Wang <cong.wang@bytedance.com>
>
> Implement ->read_sock() for AF_UNIX datagram socket, it is
> pretty much similar to udp_read_sock().
>
> Cc: John Fastabend <john.fastabend@gmail.com>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Jakub Sitnicki <jakub@cloudflare.com>
> Cc: Lorenz Bauer <lmb@cloudflare.com>
> Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> ---
>  net/unix/af_unix.c | 38 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 38 insertions(+)
>
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 5a31307ceb76..f4dc22db371d 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -661,6 +661,8 @@ static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
>  				       unsigned int flags);
>  static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
>  static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
> +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
> +			  sk_read_actor_t recv_actor);
>  static int unix_dgram_connect(struct socket *, struct sockaddr *,
>  			      int, int);
>  static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
> @@ -738,6 +740,7 @@ static const struct proto_ops unix_dgram_ops = {
>  	.listen =	sock_no_listen,
>  	.shutdown =	unix_shutdown,
>  	.sendmsg =	unix_dgram_sendmsg,
> +	.read_sock =	unix_read_sock,
>  	.recvmsg =	unix_dgram_recvmsg,
>  	.mmap =		sock_no_mmap,
>  	.sendpage =	sock_no_sendpage,
> @@ -2183,6 +2186,41 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
>  	return err;
>  }
>
> +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
> +			  sk_read_actor_t recv_actor)
> +{
> +	int copied = 0;
> +
> +	while (1) {
> +		struct unix_sock *u = unix_sk(sk);
> +		struct sk_buff *skb;
> +		int used, err;
> +
> +		mutex_lock(&u->iolock);
> +		skb = skb_recv_datagram(sk, 0, 1, &err);
> +		if (!skb) {
> +			mutex_unlock(&u->iolock);
> +			return err;
> +		}
> +
> +		used = recv_actor(desc, skb, 0, skb->len);
> +		if (used <= 0) {
> +			if (!copied)
> +				copied = used;
> +			mutex_unlock(&u->iolock);
> +			break;
> +		} else if (used <= skb->len) {
> +			copied += used;
> +		}
> +		mutex_unlock(&u->iolock);

Do we need hold the mutex for recv_actor to process the skb?

> +
> +		if (!desc->count)
> +			break;
> +	}
> +
> +	return copied;
> +}
> +
>  /*
>   *	Sleep until more data has arrived. But check for races..
>   */
Cong Wang May 7, 2021, 1 a.m. UTC | #2
On Wed, May 5, 2021 at 10:14 AM Jakub Sitnicki <jakub@cloudflare.com> wrote:
>
> On Mon, Apr 26, 2021 at 04:49 AM CEST, Cong Wang wrote:
> > From: Cong Wang <cong.wang@bytedance.com>
> >
> > Implement ->read_sock() for AF_UNIX datagram socket, it is
> > pretty much similar to udp_read_sock().
> >
> > Cc: John Fastabend <john.fastabend@gmail.com>
> > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > Cc: Jakub Sitnicki <jakub@cloudflare.com>
> > Cc: Lorenz Bauer <lmb@cloudflare.com>
> > Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> > ---
> >  net/unix/af_unix.c | 38 ++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 38 insertions(+)
> >
> > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> > index 5a31307ceb76..f4dc22db371d 100644
> > --- a/net/unix/af_unix.c
> > +++ b/net/unix/af_unix.c
> > @@ -661,6 +661,8 @@ static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
> >                                      unsigned int flags);
> >  static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
> >  static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
> > +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
> > +                       sk_read_actor_t recv_actor);
> >  static int unix_dgram_connect(struct socket *, struct sockaddr *,
> >                             int, int);
> >  static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
> > @@ -738,6 +740,7 @@ static const struct proto_ops unix_dgram_ops = {
> >       .listen =       sock_no_listen,
> >       .shutdown =     unix_shutdown,
> >       .sendmsg =      unix_dgram_sendmsg,
> > +     .read_sock =    unix_read_sock,
> >       .recvmsg =      unix_dgram_recvmsg,
> >       .mmap =         sock_no_mmap,
> >       .sendpage =     sock_no_sendpage,
> > @@ -2183,6 +2186,41 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
> >       return err;
> >  }
> >
> > +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
> > +                       sk_read_actor_t recv_actor)
> > +{
> > +     int copied = 0;
> > +
> > +     while (1) {
> > +             struct unix_sock *u = unix_sk(sk);
> > +             struct sk_buff *skb;
> > +             int used, err;
> > +
> > +             mutex_lock(&u->iolock);
> > +             skb = skb_recv_datagram(sk, 0, 1, &err);
> > +             if (!skb) {
> > +                     mutex_unlock(&u->iolock);
> > +                     return err;
> > +             }
> > +
> > +             used = recv_actor(desc, skb, 0, skb->len);
> > +             if (used <= 0) {
> > +                     if (!copied)
> > +                             copied = used;
> > +                     mutex_unlock(&u->iolock);
> > +                     break;
> > +             } else if (used <= skb->len) {
> > +                     copied += used;
> > +             }
> > +             mutex_unlock(&u->iolock);
>
> Do we need hold the mutex for recv_actor to process the skb?

Hm, it does look like we can just release it after dequeuing the
skb. Let me double check.

Thanks.
John Fastabend May 11, 2021, 5:34 a.m. UTC | #3
Cong Wang wrote:
> From: Cong Wang <cong.wang@bytedance.com>
> 
> Implement ->read_sock() for AF_UNIX datagram socket, it is
> pretty much similar to udp_read_sock().
> 
> Cc: John Fastabend <john.fastabend@gmail.com>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Jakub Sitnicki <jakub@cloudflare.com>
> Cc: Lorenz Bauer <lmb@cloudflare.com>
> Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> ---
>  net/unix/af_unix.c | 38 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 38 insertions(+)
> 
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 5a31307ceb76..f4dc22db371d 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -661,6 +661,8 @@ static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
>  				       unsigned int flags);
>  static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
>  static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
> +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
> +			  sk_read_actor_t recv_actor);
>  static int unix_dgram_connect(struct socket *, struct sockaddr *,
>  			      int, int);
>  static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
> @@ -738,6 +740,7 @@ static const struct proto_ops unix_dgram_ops = {
>  	.listen =	sock_no_listen,
>  	.shutdown =	unix_shutdown,
>  	.sendmsg =	unix_dgram_sendmsg,
> +	.read_sock =	unix_read_sock,
>  	.recvmsg =	unix_dgram_recvmsg,
>  	.mmap =		sock_no_mmap,
>  	.sendpage =	sock_no_sendpage,
> @@ -2183,6 +2186,41 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
>  	return err;
>  }
>  
> +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
> +			  sk_read_actor_t recv_actor)
> +{
> +	int copied = 0;
> +
> +	while (1) {
> +		struct unix_sock *u = unix_sk(sk);
> +		struct sk_buff *skb;
> +		int used, err;
> +
> +		mutex_lock(&u->iolock);
> +		skb = skb_recv_datagram(sk, 0, 1, &err);
> +		if (!skb) {
> +			mutex_unlock(&u->iolock);
> +			return err;

Here we should check copied and break if copied is >0. Sure the caller here
has desc.count = 1 but its still fairly fragile.

> +		}
> +
> +		used = recv_actor(desc, skb, 0, skb->len);
> +		if (used <= 0) {
> +			if (!copied)
> +				copied = used;
> +			mutex_unlock(&u->iolock);
> +			break;
> +		} else if (used <= skb->len) {
> +			copied += used;
> +		}
> +		mutex_unlock(&u->iolock);
> +
> +		if (!desc->count)
> +			break;
> +	}
> +
> +	return copied;
> +}
> +
>  /*
>   *	Sleep until more data has arrived. But check for races..
>   */
> -- 
> 2.25.1
>
Cong Wang May 18, 2021, 4:46 a.m. UTC | #4
On Mon, May 10, 2021 at 10:34 PM John Fastabend
<john.fastabend@gmail.com> wrote:
> > +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
> > +                       sk_read_actor_t recv_actor)
> > +{
> > +     int copied = 0;
> > +
> > +     while (1) {
> > +             struct unix_sock *u = unix_sk(sk);
> > +             struct sk_buff *skb;
> > +             int used, err;
> > +
> > +             mutex_lock(&u->iolock);
> > +             skb = skb_recv_datagram(sk, 0, 1, &err);
> > +             if (!skb) {
> > +                     mutex_unlock(&u->iolock);
> > +                     return err;
>
> Here we should check copied and break if copied is >0. Sure the caller here
> has desc.count = 1 but its still fairly fragile.

Technically, sockmap does not even care about what we return
here, so I am sure what you suggest here even makes a difference.
Also, desc->count is always 1 and never changes here.

Thanks.
John Fastabend May 18, 2021, 5:11 a.m. UTC | #5
Cong Wang wrote:
> On Mon, May 10, 2021 at 10:34 PM John Fastabend
> <john.fastabend@gmail.com> wrote:
> > > +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
> > > +                       sk_read_actor_t recv_actor)
> > > +{
> > > +     int copied = 0;
> > > +
> > > +     while (1) {
> > > +             struct unix_sock *u = unix_sk(sk);
> > > +             struct sk_buff *skb;
> > > +             int used, err;
> > > +
> > > +             mutex_lock(&u->iolock);
> > > +             skb = skb_recv_datagram(sk, 0, 1, &err);
> > > +             if (!skb) {
> > > +                     mutex_unlock(&u->iolock);
> > > +                     return err;
> >
> > Here we should check copied and break if copied is >0. Sure the caller here
> > has desc.count = 1 but its still fairly fragile.
> 
> Technically, sockmap does not even care about what we return
> here, so I am sure what you suggest here even makes a difference.
> Also, desc->count is always 1 and never changes here.

Right, so either don't wrap it in a while() loop so its obviously
not workable or fix it so that it returns the correct copied value if
we ever did pass it count > 1.. 

> 
> Thanks.
diff mbox series

Patch

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5a31307ceb76..f4dc22db371d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -661,6 +661,8 @@  static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 				       unsigned int flags);
 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
+static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
+			  sk_read_actor_t recv_actor);
 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 			      int, int);
 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -738,6 +740,7 @@  static const struct proto_ops unix_dgram_ops = {
 	.listen =	sock_no_listen,
 	.shutdown =	unix_shutdown,
 	.sendmsg =	unix_dgram_sendmsg,
+	.read_sock =	unix_read_sock,
 	.recvmsg =	unix_dgram_recvmsg,
 	.mmap =		sock_no_mmap,
 	.sendpage =	sock_no_sendpage,
@@ -2183,6 +2186,41 @@  static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 	return err;
 }
 
+static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
+			  sk_read_actor_t recv_actor)
+{
+	int copied = 0;
+
+	while (1) {
+		struct unix_sock *u = unix_sk(sk);
+		struct sk_buff *skb;
+		int used, err;
+
+		mutex_lock(&u->iolock);
+		skb = skb_recv_datagram(sk, 0, 1, &err);
+		if (!skb) {
+			mutex_unlock(&u->iolock);
+			return err;
+		}
+
+		used = recv_actor(desc, skb, 0, skb->len);
+		if (used <= 0) {
+			if (!copied)
+				copied = used;
+			mutex_unlock(&u->iolock);
+			break;
+		} else if (used <= skb->len) {
+			copied += used;
+		}
+		mutex_unlock(&u->iolock);
+
+		if (!desc->count)
+			break;
+	}
+
+	return copied;
+}
+
 /*
  *	Sleep until more data has arrived. But check for races..
  */