diff mbox series

[bpf-next,v3,05/10] af_unix: implement unix_dgram_bpf_recvmsg()

Message ID 20210426025001.7899-6-xiyou.wangcong@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series sockmap: add sockmap support to Unix datagram socket | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 13 maintainers not CCed: gustavoars@kernel.org yhs@fb.com kpsingh@kernel.org andrii@kernel.org jingxiangfeng@huawei.com kafai@fb.com orcohen2006@gmail.com ast@kernel.org jamorris@linux.microsoft.com songliubraving@fb.com christian.brauner@ubuntu.com davem@davemloft.net kuba@kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 73 this patch: 73
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 72 this patch: 72
netdev/header_inline success Link

Commit Message

Cong Wang April 26, 2021, 2:49 a.m. UTC
From: Cong Wang <cong.wang@bytedance.com>

We have to implement unix_dgram_bpf_recvmsg() to replace the
original ->recvmsg() to retrieve skmsg from ingress_msg.

AF_UNIX is again special here because the lack of
sk_prot->recvmsg(). I simply add a special case inside
unix_dgram_recvmsg() to call sk->sk_prot->recvmsg() directly.

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 include/net/af_unix.h |  3 +++
 net/unix/af_unix.c    | 21 ++++++++++++++++---
 net/unix/unix_bpf.c   | 49 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 70 insertions(+), 3 deletions(-)

Comments

Jakub Sitnicki May 7, 2021, 1:29 p.m. UTC | #1
On Mon, Apr 26, 2021 at 04:49 AM CEST, Cong Wang wrote:
> From: Cong Wang <cong.wang@bytedance.com>
>
> We have to implement unix_dgram_bpf_recvmsg() to replace the
> original ->recvmsg() to retrieve skmsg from ingress_msg.
>
> AF_UNIX is again special here because the lack of
> sk_prot->recvmsg(). I simply add a special case inside
> unix_dgram_recvmsg() to call sk->sk_prot->recvmsg() directly.
>
> Cc: John Fastabend <john.fastabend@gmail.com>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Jakub Sitnicki <jakub@cloudflare.com>
> Cc: Lorenz Bauer <lmb@cloudflare.com>
> Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> ---
>  include/net/af_unix.h |  3 +++
>  net/unix/af_unix.c    | 21 ++++++++++++++++---
>  net/unix/unix_bpf.c   | 49 +++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 70 insertions(+), 3 deletions(-)
>
> diff --git a/include/net/af_unix.h b/include/net/af_unix.h
> index cca645846af1..e524c82794c9 100644
> --- a/include/net/af_unix.h
> +++ b/include/net/af_unix.h
> @@ -82,6 +82,9 @@ static inline struct unix_sock *unix_sk(const struct sock *sk)
>  long unix_inq_len(struct sock *sk);
>  long unix_outq_len(struct sock *sk);
>  
> +int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
> +			 int nonblock, int flags, int *addr_len);
> +
>  #ifdef CONFIG_SYSCTL
>  int unix_sysctl_register(struct net *net);
>  void unix_sysctl_unregister(struct net *net);
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index c4afc5fbe137..08458fa9f48b 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -2088,11 +2088,11 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
>  	}
>  }
>  
> -static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
> -			      size_t size, int flags)
> +int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
> +			 int nonblock, int flags, int *addr_len)
>  {
>  	struct scm_cookie scm;
> -	struct sock *sk = sock->sk;
> +	struct socket *sock = sk->sk_socket;
>  	struct unix_sock *u = unix_sk(sk);
>  	struct sk_buff *skb, *last;
>  	long timeo;
> @@ -2195,6 +2195,21 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
>  	return err;
>  }
>  
> +static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
> +			      int flags)
> +{
> +	struct sock *sk = sock->sk;
> +	int addr_len = 0;
> +
> +#ifdef CONFIG_BPF_SYSCALL
> +	if (sk->sk_prot != &unix_proto)
> +		return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
> +					    flags & ~MSG_DONTWAIT, &addr_len);
> +#endif
> +	return __unix_dgram_recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
> +				    flags, &addr_len);
> +}
> +

Nit: We can just pass NULL instead of &addr_len here it seems.

[...]
Cong Wang May 8, 2021, 8:43 p.m. UTC | #2
On Fri, May 7, 2021 at 6:29 AM Jakub Sitnicki <jakub@cloudflare.com> wrote:
>
> On Mon, Apr 26, 2021 at 04:49 AM CEST, Cong Wang wrote:
> > From: Cong Wang <cong.wang@bytedance.com>
> >
> > We have to implement unix_dgram_bpf_recvmsg() to replace the
> > original ->recvmsg() to retrieve skmsg from ingress_msg.
> >
> > AF_UNIX is again special here because the lack of
> > sk_prot->recvmsg(). I simply add a special case inside
> > unix_dgram_recvmsg() to call sk->sk_prot->recvmsg() directly.
> >
> > Cc: John Fastabend <john.fastabend@gmail.com>
> > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > Cc: Jakub Sitnicki <jakub@cloudflare.com>
> > Cc: Lorenz Bauer <lmb@cloudflare.com>
> > Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> > ---
> >  include/net/af_unix.h |  3 +++
> >  net/unix/af_unix.c    | 21 ++++++++++++++++---
> >  net/unix/unix_bpf.c   | 49 +++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 70 insertions(+), 3 deletions(-)
> >
> > diff --git a/include/net/af_unix.h b/include/net/af_unix.h
> > index cca645846af1..e524c82794c9 100644
> > --- a/include/net/af_unix.h
> > +++ b/include/net/af_unix.h
> > @@ -82,6 +82,9 @@ static inline struct unix_sock *unix_sk(const struct sock *sk)
> >  long unix_inq_len(struct sock *sk);
> >  long unix_outq_len(struct sock *sk);
> >
> > +int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
> > +                      int nonblock, int flags, int *addr_len);
> > +
> >  #ifdef CONFIG_SYSCTL
> >  int unix_sysctl_register(struct net *net);
> >  void unix_sysctl_unregister(struct net *net);
> > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> > index c4afc5fbe137..08458fa9f48b 100644
> > --- a/net/unix/af_unix.c
> > +++ b/net/unix/af_unix.c
> > @@ -2088,11 +2088,11 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
> >       }
> >  }
> >
> > -static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
> > -                           size_t size, int flags)
> > +int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
> > +                      int nonblock, int flags, int *addr_len)
> >  {
> >       struct scm_cookie scm;
> > -     struct sock *sk = sock->sk;
> > +     struct socket *sock = sk->sk_socket;
> >       struct unix_sock *u = unix_sk(sk);
> >       struct sk_buff *skb, *last;
> >       long timeo;
> > @@ -2195,6 +2195,21 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
> >       return err;
> >  }
> >
> > +static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
> > +                           int flags)
> > +{
> > +     struct sock *sk = sock->sk;
> > +     int addr_len = 0;
> > +
> > +#ifdef CONFIG_BPF_SYSCALL
> > +     if (sk->sk_prot != &unix_proto)
> > +             return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
> > +                                         flags & ~MSG_DONTWAIT, &addr_len);
> > +#endif
> > +     return __unix_dgram_recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
> > +                                 flags, &addr_len);
> > +}
> > +
>
> Nit: We can just pass NULL instead of &addr_len here it seems.

Yeah, we can actually remove this parameter for __unix_dgram_recvmsg().
Only unix_dgram_bpf_recvmsg() needs it as it is enforced by sk_prot.

Thanks.
diff mbox series

Patch

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index cca645846af1..e524c82794c9 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -82,6 +82,9 @@  static inline struct unix_sock *unix_sk(const struct sock *sk)
 long unix_inq_len(struct sock *sk);
 long unix_outq_len(struct sock *sk);
 
+int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
+			 int nonblock, int flags, int *addr_len);
+
 #ifdef CONFIG_SYSCTL
 int unix_sysctl_register(struct net *net);
 void unix_sysctl_unregister(struct net *net);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c4afc5fbe137..08458fa9f48b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2088,11 +2088,11 @@  static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
 	}
 }
 
-static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
-			      size_t size, int flags)
+int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
+			 int nonblock, int flags, int *addr_len)
 {
 	struct scm_cookie scm;
-	struct sock *sk = sock->sk;
+	struct socket *sock = sk->sk_socket;
 	struct unix_sock *u = unix_sk(sk);
 	struct sk_buff *skb, *last;
 	long timeo;
@@ -2195,6 +2195,21 @@  static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 	return err;
 }
 
+static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+			      int flags)
+{
+	struct sock *sk = sock->sk;
+	int addr_len = 0;
+
+#ifdef CONFIG_BPF_SYSCALL
+	if (sk->sk_prot != &unix_proto)
+		return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
+					    flags & ~MSG_DONTWAIT, &addr_len);
+#endif
+	return __unix_dgram_recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
+				    flags, &addr_len);
+}
+
 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
 			  sk_read_actor_t recv_actor)
 {
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index b1582a659427..b2c34aeb848f 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -6,6 +6,54 @@ 
 #include <net/sock.h>
 #include <net/af_unix.h>
 
+static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
+				  size_t len, int nonblock, int flags,
+				  int *addr_len)
+{
+	struct sk_psock *psock;
+	int copied, ret;
+
+	psock = sk_psock_get(sk);
+	if (unlikely(!psock))
+		return __unix_dgram_recvmsg(sk, msg, len, nonblock, flags,
+					    addr_len);
+
+	lock_sock(sk);
+	if (!skb_queue_empty(&sk->sk_receive_queue) &&
+	    sk_psock_queue_empty(psock)) {
+		ret = __unix_dgram_recvmsg(sk, msg, len, nonblock, flags,
+					   addr_len);
+		goto out;
+	}
+
+msg_bytes_ready:
+	copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
+	if (!copied) {
+		int data, err = 0;
+		long timeo;
+
+		timeo = sock_rcvtimeo(sk, nonblock);
+		data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
+		if (data) {
+			if (!sk_psock_queue_empty(psock))
+				goto msg_bytes_ready;
+			ret = __unix_dgram_recvmsg(sk, msg, len, nonblock,
+						   flags, addr_len);
+			goto out;
+		}
+		if (err) {
+			ret = err;
+			goto out;
+		}
+		copied = -EAGAIN;
+	}
+	ret = copied;
+out:
+	release_sock(sk);
+	sk_psock_put(sk, psock);
+	return ret;
+}
+
 static struct proto *unix_prot_saved __read_mostly;
 static DEFINE_SPINLOCK(unix_prot_lock);
 static struct proto unix_bpf_prot;
@@ -14,6 +62,7 @@  static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base
 {
 	*prot        = *base;
 	prot->close  = sock_map_close;
+	prot->recvmsg = unix_dgram_bpf_recvmsg;
 }
 
 static void unix_bpf_check_needs_rebuild(struct proto *ops)