diff mbox series

[bpf-next,v2,5/9] bpf: Implement cgroup sockaddr hooks for unix sockets

Message ID 20221210193559.371515-6-daan.j.demeyer@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Add cgroup sockaddr hooks for unix sockets | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 fail Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 fail Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-12 fail Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 fail Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-17 fail Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 fail Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-19 fail Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 fail Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 fail Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-34 fail Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 fail Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-36 fail Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-37 fail Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 fail Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 fail Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 fail Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 fail Logs for test_maps on s390x with gcc
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 12349 this patch: 12349
netdev/cc_maintainers warning 16 maintainers not CCed: edumazet@google.com netdev@vger.kernel.org ast@kernel.org kpsingh@kernel.org haoluo@google.com davem@davemloft.net song@kernel.org yhs@fb.com daniel@iogearbox.net kuba@kernel.org kuniyu@amazon.com andrii@kernel.org sdf@google.com pabeni@redhat.com john.fastabend@gmail.com jolsa@kernel.org
netdev/build_clang success Errors and warnings before: 3184 this patch: 3184
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff fail author Signed-off-by missing
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 13028 this patch: 13028
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Lines should not end with a '(' WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: please, no space before tabs
netdev/kdoc success Errors and warnings before: 12 this patch: 12
netdev/source_inline success Was 0 now: 0

Commit Message

Daan De Meyer Dec. 10, 2022, 7:35 p.m. UTC
These hooks allows intercepting bind(), connect(), getsockname(),
getpeername(), sendmsg() and recvmsg() for unix sockets. The unix
socket hooks get write access to the address length because the
address length is not fixed when dealing with unix sockets and
needs to be modified when a unix socket address is modified by
the hook. Because abstract socket unix addresses start with a
NUL byte, we cannot recalculate the socket address in kernelspace
after running the hook by calculating the length of the unix socket
path using strlen().

This hook can be used when users want to multiplex syscall to a
single unix socket to multiple different processes behind the scenes
by redirecting the connect() and other syscalls to process specific
sockets.
---
 include/linux/bpf-cgroup-defs.h |  6 +++
 include/linux/bpf-cgroup.h      | 29 ++++++++++-
 include/uapi/linux/bpf.h        | 14 ++++--
 kernel/bpf/cgroup.c             | 11 ++++-
 kernel/bpf/syscall.c            | 18 +++++++
 kernel/bpf/verifier.c           |  7 ++-
 net/core/filter.c               | 45 +++++++++++++++--
 net/unix/af_unix.c              | 85 +++++++++++++++++++++++++++++----
 tools/include/uapi/linux/bpf.h  | 14 ++++--
 9 files changed, 204 insertions(+), 25 deletions(-)

Comments

Yonghong Song Dec. 13, 2022, 6:20 a.m. UTC | #1
On 12/10/22 11:35 AM, Daan De Meyer wrote:
> These hooks allows intercepting bind(), connect(), getsockname(),
> getpeername(), sendmsg() and recvmsg() for unix sockets. The unix
> socket hooks get write access to the address length because the
> address length is not fixed when dealing with unix sockets and
> needs to be modified when a unix socket address is modified by
> the hook. Because abstract socket unix addresses start with a
> NUL byte, we cannot recalculate the socket address in kernelspace
> after running the hook by calculating the length of the unix socket
> path using strlen().

Yes, although we cannot calculate the socket path length with
strlen(). But we still have a method to find the path. In
unix_seq_show(), the unix socket path is calculated as below,

                 if (u->addr) {  // under a hash table lock here
                         int i, len;
                         seq_putc(seq, ' ');

                         i = 0;
                         len = u->addr->len -
                                 offsetof(struct sockaddr_un, sun_path);
                         if (u->addr->name->sun_path[0]) {
                                 len--;
                         } else {
                                 seq_putc(seq, '@');
                                 i++;
                         }
                         for ( ; i < len; i++)
                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
                                          '@');
                 }

Is it possible that we can use the above method to find the
address length so we won't need to pass uaddr_len to bpf program?

Since all other hooks do not need to uaddr_len, you could add some
new hooks for unix socket which can specially calculate uaddr_len
after the bpf program run.

> 
> This hook can be used when users want to multiplex syscall to a
> single unix socket to multiple different processes behind the scenes
> by redirecting the connect() and other syscalls to process specific
> sockets.
> ---
>   include/linux/bpf-cgroup-defs.h |  6 +++
>   include/linux/bpf-cgroup.h      | 29 ++++++++++-
>   include/uapi/linux/bpf.h        | 14 ++++--
>   kernel/bpf/cgroup.c             | 11 ++++-
>   kernel/bpf/syscall.c            | 18 +++++++
>   kernel/bpf/verifier.c           |  7 ++-
>   net/core/filter.c               | 45 +++++++++++++++--
>   net/unix/af_unix.c              | 85 +++++++++++++++++++++++++++++----
>   tools/include/uapi/linux/bpf.h  | 14 ++++--
>   9 files changed, 204 insertions(+), 25 deletions(-)
> 
> diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
> index 7b121bd780eb..8196ccb81915 100644
> --- a/include/linux/bpf-cgroup-defs.h
> +++ b/include/linux/bpf-cgroup-defs.h
> @@ -26,21 +26,27 @@ enum cgroup_bpf_attach_type {
>   	CGROUP_DEVICE,
>   	CGROUP_INET4_BIND,
>   	CGROUP_INET6_BIND,
> +	CGROUP_UNIX_BIND,
>   	CGROUP_INET4_CONNECT,
>   	CGROUP_INET6_CONNECT,
> +	CGROUP_UNIX_CONNECT,
>   	CGROUP_INET4_POST_BIND,
>   	CGROUP_INET6_POST_BIND,
>   	CGROUP_UDP4_SENDMSG,
>   	CGROUP_UDP6_SENDMSG,
> +	CGROUP_UNIX_SENDMSG,
>   	CGROUP_SYSCTL,
>   	CGROUP_UDP4_RECVMSG,
>   	CGROUP_UDP6_RECVMSG,
> +	CGROUP_UNIX_RECVMSG,
>   	CGROUP_GETSOCKOPT,
>   	CGROUP_SETSOCKOPT,
>   	CGROUP_INET4_GETPEERNAME,
>   	CGROUP_INET6_GETPEERNAME,
> +	CGROUP_UNIX_GETPEERNAME,
>   	CGROUP_INET4_GETSOCKNAME,
>   	CGROUP_INET6_GETSOCKNAME,
> +	CGROUP_UNIX_GETSOCKNAME,
>   	CGROUP_INET_SOCK_RELEASE,
>   	CGROUP_LSM_START,
>   	CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1,
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index 3ab2f06ddc8a..4de3016f01e4 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -46,21 +46,27 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type)
>   	CGROUP_ATYPE(CGROUP_DEVICE);
>   	CGROUP_ATYPE(CGROUP_INET4_BIND);
>   	CGROUP_ATYPE(CGROUP_INET6_BIND);
> +	CGROUP_ATYPE(CGROUP_UNIX_BIND);
>   	CGROUP_ATYPE(CGROUP_INET4_CONNECT);
>   	CGROUP_ATYPE(CGROUP_INET6_CONNECT);
> +	CGROUP_ATYPE(CGROUP_UNIX_CONNECT);
>   	CGROUP_ATYPE(CGROUP_INET4_POST_BIND);
>   	CGROUP_ATYPE(CGROUP_INET6_POST_BIND);
>   	CGROUP_ATYPE(CGROUP_UDP4_SENDMSG);
>   	CGROUP_ATYPE(CGROUP_UDP6_SENDMSG);
> +	CGROUP_ATYPE(CGROUP_UNIX_SENDMSG);
>   	CGROUP_ATYPE(CGROUP_SYSCTL);
>   	CGROUP_ATYPE(CGROUP_UDP4_RECVMSG);
>   	CGROUP_ATYPE(CGROUP_UDP6_RECVMSG);
> +	CGROUP_ATYPE(CGROUP_UNIX_RECVMSG);
>   	CGROUP_ATYPE(CGROUP_GETSOCKOPT);
>   	CGROUP_ATYPE(CGROUP_SETSOCKOPT);
>   	CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME);
>   	CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME);
> +	CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME);
>   	CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME);
>   	CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME);
> +	CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME);
>   	CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE);
>   	default:
>   		return CGROUP_BPF_ATTACH_TYPE_INVALID;
> @@ -273,9 +279,13 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
>   		__ret;                                                       \
>   	})
>   
> +#define BPF_CGROUP_RUN_PROG_UNIX_BIND_LOCK(sk, uaddr, uaddrlen)			\
> +	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_BIND, NULL)
> +
>   #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk)				       \
>   	((cgroup_bpf_enabled(CGROUP_INET4_CONNECT) ||		       \
> -	  cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) &&		       \
> +	  cgroup_bpf_enabled(CGROUP_INET6_CONNECT) ||		       \
> +	  cgroup_bpf_enabled(CGROUP_UNIX_CONNECT)) &&		       \
>   	 (sk)->sk_prot->pre_connect)
>   
>   #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen)		       \
> @@ -284,24 +294,36 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
>   #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen)		       \
>   	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT)
>   
> +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT(sk, uaddr, uaddrlen)	               \
> +	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT)
> +
>   #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen)	       \
>   	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL)
>   
>   #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen)	       \
>   	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL)
>   
> +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen)	       \
> +	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL)
> +
>   #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx)       \
>   	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx)
>   
>   #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx)       \
>   	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx)
>   
> +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx)	\
> +	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx)
> +
>   #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen)		\
>   	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL)
>   
>   #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen)		\
>   	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL)
>   
> +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen)		\
> +	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL)
> +
>   /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
>    * fullsock and its parent fullsock cannot be traced by
>    * sk_to_full_sk().
> @@ -487,16 +509,21 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
>   #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_UNIX_BIND_LOCK(sk, uaddr, uaddrlen) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 9e3c33f83bba..b73e4da458fd 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -999,17 +999,21 @@ enum bpf_attach_type {
>   	BPF_SK_MSG_VERDICT,
>   	BPF_CGROUP_INET4_BIND,
>   	BPF_CGROUP_INET6_BIND,
> +	BPF_CGROUP_UNIX_BIND,
>   	BPF_CGROUP_INET4_CONNECT,
>   	BPF_CGROUP_INET6_CONNECT,
> +	BPF_CGROUP_UNIX_CONNECT,
>   	BPF_CGROUP_INET4_POST_BIND,
>   	BPF_CGROUP_INET6_POST_BIND,
>   	BPF_CGROUP_UDP4_SENDMSG,
>   	BPF_CGROUP_UDP6_SENDMSG,
> +	BPF_CGROUP_UNIX_SENDMSG,
>   	BPF_LIRC_MODE2,
>   	BPF_FLOW_DISSECTOR,
>   	BPF_CGROUP_SYSCTL,
>   	BPF_CGROUP_UDP4_RECVMSG,
>   	BPF_CGROUP_UDP6_RECVMSG,
> +	BPF_CGROUP_UNIX_RECVMSG,
>   	BPF_CGROUP_GETSOCKOPT,
>   	BPF_CGROUP_SETSOCKOPT,
>   	BPF_TRACE_RAW_TP,
> @@ -1020,8 +1024,10 @@ enum bpf_attach_type {
>   	BPF_TRACE_ITER,
>   	BPF_CGROUP_INET4_GETPEERNAME,
>   	BPF_CGROUP_INET6_GETPEERNAME,
> +	BPF_CGROUP_UNIX_GETPEERNAME,
>   	BPF_CGROUP_INET4_GETSOCKNAME,
>   	BPF_CGROUP_INET6_GETSOCKNAME,
> +	BPF_CGROUP_UNIX_GETSOCKNAME,
>   	BPF_XDP_DEVMAP,
>   	BPF_CGROUP_INET_SOCK_RELEASE,
>   	BPF_XDP_CPUMAP,

This is uapi. Please add new attach type to the end of enum type.

> @@ -2575,8 +2581,8 @@ union bpf_attr {
>    * 		*bpf_socket* should be one of the following:
>    *
>    * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
> - * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
> - * 		  and **BPF_CGROUP_INET6_CONNECT**.
> + * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
> + * 		  **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
>    *
>    * 		This helper actually implements a subset of **setsockopt()**.
>    * 		It supports the following *level*\ s:
> @@ -2809,8 +2815,8 @@ union bpf_attr {
>    * 		*bpf_socket* should be one of the following:
>    *
>    * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
> - * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
> - * 		  and **BPF_CGROUP_INET6_CONNECT**.
> + * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
> + * 		  **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
>    *
>    * 		This helper actually implements a subset of **getsockopt()**.
>    * 		It supports the same set of *optname*\ s that is supported by
[...]
Daan De Meyer Dec. 13, 2022, 11:36 a.m. UTC | #2
> On 12/10/22 11:35 AM, Daan De Meyer wrote:
> > These hooks allows intercepting bind(), connect(), getsockname(),
> > getpeername(), sendmsg() and recvmsg() for unix sockets. The unix
> > socket hooks get write access to the address length because the
> > address length is not fixed when dealing with unix sockets and
> > needs to be modified when a unix socket address is modified by
> > the hook. Because abstract socket unix addresses start with a
> > NUL byte, we cannot recalculate the socket address in kernelspace
> > after running the hook by calculating the length of the unix socket
> > path using strlen().
>
> Yes, although we cannot calculate the socket path length with
> strlen(). But we still have a method to find the path. In
> unix_seq_show(), the unix socket path is calculated as below,
>
>                  if (u->addr) {  // under a hash table lock here
>                          int i, len;
>                          seq_putc(seq, ' ');
>
>                          i = 0;
>                          len = u->addr->len -
>                                  offsetof(struct sockaddr_un, sun_path);
>                          if (u->addr->name->sun_path[0]) {
>                                  len--;
>                          } else {
>                                  seq_putc(seq, '@');
>                                  i++;
>                          }
>                          for ( ; i < len; i++)
>                                  seq_putc(seq, u->addr->name->sun_path[i] ?:
>                                           '@');
>                  }
>
> Is it possible that we can use the above method to find the
> address length so we won't need to pass uaddr_len to bpf program?
>
> Since all other hooks do not need to uaddr_len, you could add some
> new hooks for unix socket which can specially calculate uaddr_len
> after the bpf program run.

I don't think we can. If we look at the definition of abstract unix
socket in the official man page:

> abstract: an abstract socket address is distinguished (from a pathname socket) by the fact that sun_path[0] is a null byte ('\0').  The socket's address in this namespace is given by the additional bytes in sun_path that are covered by the specified length of the address structure.  (Null bytes in
> the  name  have  no  special  significance.)   The name has no connection with filesystem pathnames.  When the address of an abstract socket is returned, the returned addrlen is greater than sizeof(sa_family_t) (i.e., greater than 2), and the name of the socket is contained in the first (addrlen -
> sizeof(sa_family_t)) bytes of sun_path.

This specifically says that the address in the abstract namespace is
given by the additional bytes in sun_path that are covered by the
length of the address structure. If I understand correctly, that means
there's no way to derive the length from just the contents of the
sockaddr structure. We need
the actual length as specified by the caller to know which bytes
belong to the address. Note that it's valid for the abstract name to
contain Null bytes, so we cannot use those in any way or form to
detect whether further bytes belong to the address or not. It seems
valid to have an abstract name
consisting of 107 Null bytes in sun_path.


On Tue, 13 Dec 2022 at 06:20, Yonghong Song <yhs@meta.com> wrote:
>
>
>
> On 12/10/22 11:35 AM, Daan De Meyer wrote:
> > These hooks allows intercepting bind(), connect(), getsockname(),
> > getpeername(), sendmsg() and recvmsg() for unix sockets. The unix
> > socket hooks get write access to the address length because the
> > address length is not fixed when dealing with unix sockets and
> > needs to be modified when a unix socket address is modified by
> > the hook. Because abstract socket unix addresses start with a
> > NUL byte, we cannot recalculate the socket address in kernelspace
> > after running the hook by calculating the length of the unix socket
> > path using strlen().
>
> Yes, although we cannot calculate the socket path length with
> strlen(). But we still have a method to find the path. In
> unix_seq_show(), the unix socket path is calculated as below,
>
>                  if (u->addr) {  // under a hash table lock here
>                          int i, len;
>                          seq_putc(seq, ' ');
>
>                          i = 0;
>                          len = u->addr->len -
>                                  offsetof(struct sockaddr_un, sun_path);
>                          if (u->addr->name->sun_path[0]) {
>                                  len--;
>                          } else {
>                                  seq_putc(seq, '@');
>                                  i++;
>                          }
>                          for ( ; i < len; i++)
>                                  seq_putc(seq, u->addr->name->sun_path[i] ?:
>                                           '@');
>                  }
>
> Is it possible that we can use the above method to find the
> address length so we won't need to pass uaddr_len to bpf program?
>
> Since all other hooks do not need to uaddr_len, you could add some
> new hooks for unix socket which can specially calculate uaddr_len
> after the bpf program run.
>
> >
> > This hook can be used when users want to multiplex syscall to a
> > single unix socket to multiple different processes behind the scenes
> > by redirecting the connect() and other syscalls to process specific
> > sockets.
> > ---
> >   include/linux/bpf-cgroup-defs.h |  6 +++
> >   include/linux/bpf-cgroup.h      | 29 ++++++++++-
> >   include/uapi/linux/bpf.h        | 14 ++++--
> >   kernel/bpf/cgroup.c             | 11 ++++-
> >   kernel/bpf/syscall.c            | 18 +++++++
> >   kernel/bpf/verifier.c           |  7 ++-
> >   net/core/filter.c               | 45 +++++++++++++++--
> >   net/unix/af_unix.c              | 85 +++++++++++++++++++++++++++++----
> >   tools/include/uapi/linux/bpf.h  | 14 ++++--
> >   9 files changed, 204 insertions(+), 25 deletions(-)
> >
> > diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
> > index 7b121bd780eb..8196ccb81915 100644
> > --- a/include/linux/bpf-cgroup-defs.h
> > +++ b/include/linux/bpf-cgroup-defs.h
> > @@ -26,21 +26,27 @@ enum cgroup_bpf_attach_type {
> >       CGROUP_DEVICE,
> >       CGROUP_INET4_BIND,
> >       CGROUP_INET6_BIND,
> > +     CGROUP_UNIX_BIND,
> >       CGROUP_INET4_CONNECT,
> >       CGROUP_INET6_CONNECT,
> > +     CGROUP_UNIX_CONNECT,
> >       CGROUP_INET4_POST_BIND,
> >       CGROUP_INET6_POST_BIND,
> >       CGROUP_UDP4_SENDMSG,
> >       CGROUP_UDP6_SENDMSG,
> > +     CGROUP_UNIX_SENDMSG,
> >       CGROUP_SYSCTL,
> >       CGROUP_UDP4_RECVMSG,
> >       CGROUP_UDP6_RECVMSG,
> > +     CGROUP_UNIX_RECVMSG,
> >       CGROUP_GETSOCKOPT,
> >       CGROUP_SETSOCKOPT,
> >       CGROUP_INET4_GETPEERNAME,
> >       CGROUP_INET6_GETPEERNAME,
> > +     CGROUP_UNIX_GETPEERNAME,
> >       CGROUP_INET4_GETSOCKNAME,
> >       CGROUP_INET6_GETSOCKNAME,
> > +     CGROUP_UNIX_GETSOCKNAME,
> >       CGROUP_INET_SOCK_RELEASE,
> >       CGROUP_LSM_START,
> >       CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1,
> > diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> > index 3ab2f06ddc8a..4de3016f01e4 100644
> > --- a/include/linux/bpf-cgroup.h
> > +++ b/include/linux/bpf-cgroup.h
> > @@ -46,21 +46,27 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type)
> >       CGROUP_ATYPE(CGROUP_DEVICE);
> >       CGROUP_ATYPE(CGROUP_INET4_BIND);
> >       CGROUP_ATYPE(CGROUP_INET6_BIND);
> > +     CGROUP_ATYPE(CGROUP_UNIX_BIND);
> >       CGROUP_ATYPE(CGROUP_INET4_CONNECT);
> >       CGROUP_ATYPE(CGROUP_INET6_CONNECT);
> > +     CGROUP_ATYPE(CGROUP_UNIX_CONNECT);
> >       CGROUP_ATYPE(CGROUP_INET4_POST_BIND);
> >       CGROUP_ATYPE(CGROUP_INET6_POST_BIND);
> >       CGROUP_ATYPE(CGROUP_UDP4_SENDMSG);
> >       CGROUP_ATYPE(CGROUP_UDP6_SENDMSG);
> > +     CGROUP_ATYPE(CGROUP_UNIX_SENDMSG);
> >       CGROUP_ATYPE(CGROUP_SYSCTL);
> >       CGROUP_ATYPE(CGROUP_UDP4_RECVMSG);
> >       CGROUP_ATYPE(CGROUP_UDP6_RECVMSG);
> > +     CGROUP_ATYPE(CGROUP_UNIX_RECVMSG);
> >       CGROUP_ATYPE(CGROUP_GETSOCKOPT);
> >       CGROUP_ATYPE(CGROUP_SETSOCKOPT);
> >       CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME);
> >       CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME);
> > +     CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME);
> >       CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME);
> >       CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME);
> > +     CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME);
> >       CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE);
> >       default:
> >               return CGROUP_BPF_ATTACH_TYPE_INVALID;
> > @@ -273,9 +279,13 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
> >               __ret;                                                       \
> >       })
> >
> > +#define BPF_CGROUP_RUN_PROG_UNIX_BIND_LOCK(sk, uaddr, uaddrlen)                      \
> > +     BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_BIND, NULL)
> > +
> >   #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk)                                 \
> >       ((cgroup_bpf_enabled(CGROUP_INET4_CONNECT) ||                  \
> > -       cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) &&                 \
> > +       cgroup_bpf_enabled(CGROUP_INET6_CONNECT) ||                  \
> > +       cgroup_bpf_enabled(CGROUP_UNIX_CONNECT)) &&                  \
> >        (sk)->sk_prot->pre_connect)
> >
> >   #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen)                     \
> > @@ -284,24 +294,36 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
> >   #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen)                     \
> >       BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT)
> >
> > +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT(sk, uaddr, uaddrlen)                       \
> > +     BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT)
> > +
> >   #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen)        \
> >       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL)
> >
> >   #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen)        \
> >       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL)
> >
> > +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen)          \
> > +     BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL)
> > +
> >   #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx)       \
> >       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx)
> >
> >   #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx)       \
> >       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx)
> >
> > +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx)    \
> > +     BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx)
> > +
> >   #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen)          \
> >       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL)
> >
> >   #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen)          \
> >       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL)
> >
> > +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen)           \
> > +     BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL)
> > +
> >   /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
> >    * fullsock and its parent fullsock cannot be traced by
> >    * sk_to_full_sk().
> > @@ -487,16 +509,21 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
> >   #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
> >   #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
> >   #define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; })
> > +#define BPF_CGROUP_RUN_PROG_UNIX_BIND_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> >   #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
> >   #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
> >   #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
> >   #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> >   #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
> >   #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> > +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
> > +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> >   #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
> >   #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
> > +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
> >   #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> >   #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> > +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> >   #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
> >   #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; })
> >   #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 9e3c33f83bba..b73e4da458fd 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -999,17 +999,21 @@ enum bpf_attach_type {
> >       BPF_SK_MSG_VERDICT,
> >       BPF_CGROUP_INET4_BIND,
> >       BPF_CGROUP_INET6_BIND,
> > +     BPF_CGROUP_UNIX_BIND,
> >       BPF_CGROUP_INET4_CONNECT,
> >       BPF_CGROUP_INET6_CONNECT,
> > +     BPF_CGROUP_UNIX_CONNECT,
> >       BPF_CGROUP_INET4_POST_BIND,
> >       BPF_CGROUP_INET6_POST_BIND,
> >       BPF_CGROUP_UDP4_SENDMSG,
> >       BPF_CGROUP_UDP6_SENDMSG,
> > +     BPF_CGROUP_UNIX_SENDMSG,
> >       BPF_LIRC_MODE2,
> >       BPF_FLOW_DISSECTOR,
> >       BPF_CGROUP_SYSCTL,
> >       BPF_CGROUP_UDP4_RECVMSG,
> >       BPF_CGROUP_UDP6_RECVMSG,
> > +     BPF_CGROUP_UNIX_RECVMSG,
> >       BPF_CGROUP_GETSOCKOPT,
> >       BPF_CGROUP_SETSOCKOPT,
> >       BPF_TRACE_RAW_TP,
> > @@ -1020,8 +1024,10 @@ enum bpf_attach_type {
> >       BPF_TRACE_ITER,
> >       BPF_CGROUP_INET4_GETPEERNAME,
> >       BPF_CGROUP_INET6_GETPEERNAME,
> > +     BPF_CGROUP_UNIX_GETPEERNAME,
> >       BPF_CGROUP_INET4_GETSOCKNAME,
> >       BPF_CGROUP_INET6_GETSOCKNAME,
> > +     BPF_CGROUP_UNIX_GETSOCKNAME,
> >       BPF_XDP_DEVMAP,
> >       BPF_CGROUP_INET_SOCK_RELEASE,
> >       BPF_XDP_CPUMAP,
>
> This is uapi. Please add new attach type to the end of enum type.
>
> > @@ -2575,8 +2581,8 @@ union bpf_attr {
> >    *          *bpf_socket* should be one of the following:
> >    *
> >    *          * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
> > - *           * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
> > - *             and **BPF_CGROUP_INET6_CONNECT**.
> > + *           * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
> > + *             **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
> >    *
> >    *          This helper actually implements a subset of **setsockopt()**.
> >    *          It supports the following *level*\ s:
> > @@ -2809,8 +2815,8 @@ union bpf_attr {
> >    *          *bpf_socket* should be one of the following:
> >    *
> >    *          * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
> > - *           * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
> > - *             and **BPF_CGROUP_INET6_CONNECT**.
> > + *           * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
> > + *             **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
> >    *
> >    *          This helper actually implements a subset of **getsockopt()**.
> >    *          It supports the same set of *optname*\ s that is supported by
> [...]
Yonghong Song Dec. 13, 2022, 9:54 p.m. UTC | #3
On 12/13/22 3:36 AM, Daan De Meyer wrote:
>> On 12/10/22 11:35 AM, Daan De Meyer wrote:
>>> These hooks allows intercepting bind(), connect(), getsockname(),
>>> getpeername(), sendmsg() and recvmsg() for unix sockets. The unix
>>> socket hooks get write access to the address length because the
>>> address length is not fixed when dealing with unix sockets and
>>> needs to be modified when a unix socket address is modified by
>>> the hook. Because abstract socket unix addresses start with a
>>> NUL byte, we cannot recalculate the socket address in kernelspace
>>> after running the hook by calculating the length of the unix socket
>>> path using strlen().
>>
>> Yes, although we cannot calculate the socket path length with
>> strlen(). But we still have a method to find the path. In
>> unix_seq_show(), the unix socket path is calculated as below,
>>
>>                   if (u->addr) {  // under a hash table lock here
>>                           int i, len;
>>                           seq_putc(seq, ' ');
>>
>>                           i = 0;
>>                           len = u->addr->len -
>>                                   offsetof(struct sockaddr_un, sun_path);
>>                           if (u->addr->name->sun_path[0]) {
>>                                   len--;
>>                           } else {
>>                                   seq_putc(seq, '@');
>>                                   i++;
>>                           }
>>                           for ( ; i < len; i++)
>>                                   seq_putc(seq, u->addr->name->sun_path[i] ?:
>>                                            '@');
>>                   }
>>
>> Is it possible that we can use the above method to find the
>> address length so we won't need to pass uaddr_len to bpf program?
>>
>> Since all other hooks do not need to uaddr_len, you could add some
>> new hooks for unix socket which can specially calculate uaddr_len
>> after the bpf program run.
> 
> I don't think we can. If we look at the definition of abstract unix
> socket in the official man page:
> 
>> abstract: an abstract socket address is distinguished (from a pathname socket) by the fact that sun_path[0] is a null byte ('\0').  The socket's address in this namespace is given by the additional bytes in sun_path that are covered by the specified length of the address structure.  (Null bytes in
>> the  name  have  no  special  significance.)   The name has no connection with filesystem pathnames.  When the address of an abstract socket is returned, the returned addrlen is greater than sizeof(sa_family_t) (i.e., greater than 2), and the name of the socket is contained in the first (addrlen -
>> sizeof(sa_family_t)) bytes of sun_path.
> 
> This specifically says that the address in the abstract namespace is
> given by the additional bytes in sun_path that are covered by the
> length of the address structure. If I understand correctly, that means
> there's no way to derive the length from just the contents of the
> sockaddr structure. We need
> the actual length as specified by the caller to know which bytes
> belong to the address. Note that it's valid for the abstract name to
> contain Null bytes, so we cannot use those in any way or form to
> detect whether further bytes belong to the address or not. It seems
> valid to have an abstract name
> consisting of 107 Null bytes in sun_path.

Okay, it looks like bpf program is able to set abstract name as well.
It would be good we have an example for this in selftest.

With abstract address setable by bpf program, I guess you are right,
we have to let user to explicitly tell us the address length.

I assume it is possible for user to write an address like below:
"a\0b\0"
addr_len = offsetof(struct sockaddr_un, sun_path) + 4
but actually it is illegal, right? We have to validate the
legality of sun_path/addr_len beyond unix_validate_addr(), right?

> 
> 
> On Tue, 13 Dec 2022 at 06:20, Yonghong Song <yhs@meta.com> wrote:
>>
>>
>>
>> On 12/10/22 11:35 AM, Daan De Meyer wrote:
>>> These hooks allows intercepting bind(), connect(), getsockname(),
>>> getpeername(), sendmsg() and recvmsg() for unix sockets. The unix
>>> socket hooks get write access to the address length because the
>>> address length is not fixed when dealing with unix sockets and
>>> needs to be modified when a unix socket address is modified by
>>> the hook. Because abstract socket unix addresses start with a
>>> NUL byte, we cannot recalculate the socket address in kernelspace
>>> after running the hook by calculating the length of the unix socket
>>> path using strlen().
>>
>> Yes, although we cannot calculate the socket path length with
>> strlen(). But we still have a method to find the path. In
>> unix_seq_show(), the unix socket path is calculated as below,
>>
>>                   if (u->addr) {  // under a hash table lock here
>>                           int i, len;
>>                           seq_putc(seq, ' ');
>>
>>                           i = 0;
>>                           len = u->addr->len -
>>                                   offsetof(struct sockaddr_un, sun_path);
>>                           if (u->addr->name->sun_path[0]) {
>>                                   len--;
>>                           } else {
>>                                   seq_putc(seq, '@');
>>                                   i++;
>>                           }
>>                           for ( ; i < len; i++)
>>                                   seq_putc(seq, u->addr->name->sun_path[i] ?:
>>                                            '@');
>>                   }
>>
>> Is it possible that we can use the above method to find the
>> address length so we won't need to pass uaddr_len to bpf program?
>>
>> Since all other hooks do not need to uaddr_len, you could add some
>> new hooks for unix socket which can specially calculate uaddr_len
>> after the bpf program run.
>>
>>>
>>> This hook can be used when users want to multiplex syscall to a
>>> single unix socket to multiple different processes behind the scenes
>>> by redirecting the connect() and other syscalls to process specific
>>> sockets.
>>> ---
>>>    include/linux/bpf-cgroup-defs.h |  6 +++
>>>    include/linux/bpf-cgroup.h      | 29 ++++++++++-
>>>    include/uapi/linux/bpf.h        | 14 ++++--
>>>    kernel/bpf/cgroup.c             | 11 ++++-
>>>    kernel/bpf/syscall.c            | 18 +++++++
>>>    kernel/bpf/verifier.c           |  7 ++-
>>>    net/core/filter.c               | 45 +++++++++++++++--
>>>    net/unix/af_unix.c              | 85 +++++++++++++++++++++++++++++----
>>>    tools/include/uapi/linux/bpf.h  | 14 ++++--
>>>    9 files changed, 204 insertions(+), 25 deletions(-)
>>>
[...]
Daan De Meyer Dec. 15, 2022, 2:34 p.m. UTC | #4
> >> On 12/10/22 11:35 AM, Daan De Meyer wrote:
> >>> These hooks allows intercepting bind(), connect(), getsockname(),
> >>> getpeername(), sendmsg() and recvmsg() for unix sockets. The unix
> >>> socket hooks get write access to the address length because the
> >>> address length is not fixed when dealing with unix sockets and
> >>> needs to be modified when a unix socket address is modified by
> >>> the hook. Because abstract socket unix addresses start with a
> >>> NUL byte, we cannot recalculate the socket address in kernelspace
> >>> after running the hook by calculating the length of the unix socket
> >>> path using strlen().
> >>
> >> Yes, although we cannot calculate the socket path length with
> >> strlen(). But we still have a method to find the path. In
> >> unix_seq_show(), the unix socket path is calculated as below,
> >>
> >>                   if (u->addr) {  // under a hash table lock here
> >>                           int i, len;
> >>                           seq_putc(seq, ' ');
> >>
> >>                           i = 0;
> >>                           len = u->addr->len -
> >>                                   offsetof(struct sockaddr_un, sun_path);
> >>                           if (u->addr->name->sun_path[0]) {
> >>                                   len--;
> >>                           } else {
> >>                                   seq_putc(seq, '@');
> >>                                   i++;
> >>                           }
> >>                           for ( ; i < len; i++)
> >>                                   seq_putc(seq, u->addr->name->sun_path[i] ?:
> >>                                            '@');
> >>                   }
> >>
> >> Is it possible that we can use the above method to find the
> >> address length so we won't need to pass uaddr_len to bpf program?
> >>
> >> Since all other hooks do not need to uaddr_len, you could add some
> >> new hooks for unix socket which can specially calculate uaddr_len
> >> after the bpf program run.
> >
> > I don't think we can. If we look at the definition of abstract unix
> > socket in the official man page:
> >
> >> abstract: an abstract socket address is distinguished (from a pathname socket) by the fact that sun_path[0] is a null byte ('\0').  The socket's address in this namespace is given by the additional bytes in sun_path that are covered by the specified length of the address structure.  (Null bytes in
> >> the  name  have  no  special  significance.)   The name has no connection with filesystem pathnames.  When the address of an abstract socket is returned, the returned addrlen is greater than sizeof(sa_family_t) (i.e., greater than 2), and the name of the socket is contained in the first (addrlen -
> >> sizeof(sa_family_t)) bytes of sun_path.
> >
> > This specifically says that the address in the abstract namespace is
> > given by the additional bytes in sun_path that are covered by the
> > length of the address structure. If I understand correctly, that means
> > there's no way to derive the length from just the contents of the
> > sockaddr structure. We need
> > the actual length as specified by the caller to know which bytes
> > belong to the address. Note that it's valid for the abstract name to
> > contain Null bytes, so we cannot use those in any way or form to
> > detect whether further bytes belong to the address or not. It seems
> > valid to have an abstract name
> > consisting of 107 Null bytes in sun_path.
>
> Okay, it looks like bpf program is able to set abstract name as well.
> It would be good we have an example for this in selftest.
>
> With abstract address setable by bpf program, I guess you are right,
> we have to let user to explicitly tell us the address length.
>
> I assume it is possible for user to write an address like below:
> "a\0b\0"
> addr_len = offsetof(struct sockaddr_un, sun_path) + 4
> but actually it is illegal, right? We have to validate the
> legality of sun_path/addr_len beyond unix_validate_addr(), right?

This is not actually illegal according to the man page I think, let's
look at the following quote from the man page:

>  Pathname sockets
>      When binding a socket to a pathname, a few rules should be observed for maximum portability and ease of coding:
>
>      *  The pathname in sun_path should be null-terminated.
>
>      *  The length of the pathname, including the terminating null byte, should not exceed the size of sun_path.
>
>      *  The addrlen argument that describes the enclosing sockaddr_un structure should have a value of at least:
>
>             offsetof(struct sockaddr_un, sun_path)+strlen(addr.sun_path)+1
>
>         or, more simply, addrlen can be specified as sizeof(struct sockaddr_un).

So when doing a pathname based path, the address length is allowed to
be bigger than the actual path. So I don't think
we need to do any more validation than what is done by
unix_validate_addr(). The selftests are already using abstract
unix sockets because they don't need any cleanup.


On Tue, 13 Dec 2022 at 21:54, Yonghong Song <yhs@meta.com> wrote:
>
>
>
> On 12/13/22 3:36 AM, Daan De Meyer wrote:
> >> On 12/10/22 11:35 AM, Daan De Meyer wrote:
> >>> These hooks allows intercepting bind(), connect(), getsockname(),
> >>> getpeername(), sendmsg() and recvmsg() for unix sockets. The unix
> >>> socket hooks get write access to the address length because the
> >>> address length is not fixed when dealing with unix sockets and
> >>> needs to be modified when a unix socket address is modified by
> >>> the hook. Because abstract socket unix addresses start with a
> >>> NUL byte, we cannot recalculate the socket address in kernelspace
> >>> after running the hook by calculating the length of the unix socket
> >>> path using strlen().
> >>
> >> Yes, although we cannot calculate the socket path length with
> >> strlen(). But we still have a method to find the path. In
> >> unix_seq_show(), the unix socket path is calculated as below,
> >>
> >>                   if (u->addr) {  // under a hash table lock here
> >>                           int i, len;
> >>                           seq_putc(seq, ' ');
> >>
> >>                           i = 0;
> >>                           len = u->addr->len -
> >>                                   offsetof(struct sockaddr_un, sun_path);
> >>                           if (u->addr->name->sun_path[0]) {
> >>                                   len--;
> >>                           } else {
> >>                                   seq_putc(seq, '@');
> >>                                   i++;
> >>                           }
> >>                           for ( ; i < len; i++)
> >>                                   seq_putc(seq, u->addr->name->sun_path[i] ?:
> >>                                            '@');
> >>                   }
> >>
> >> Is it possible that we can use the above method to find the
> >> address length so we won't need to pass uaddr_len to bpf program?
> >>
> >> Since all other hooks do not need to uaddr_len, you could add some
> >> new hooks for unix socket which can specially calculate uaddr_len
> >> after the bpf program run.
> >
> > I don't think we can. If we look at the definition of abstract unix
> > socket in the official man page:
> >
> >> abstract: an abstract socket address is distinguished (from a pathname socket) by the fact that sun_path[0] is a null byte ('\0').  The socket's address in this namespace is given by the additional bytes in sun_path that are covered by the specified length of the address structure.  (Null bytes in
> >> the  name  have  no  special  significance.)   The name has no connection with filesystem pathnames.  When the address of an abstract socket is returned, the returned addrlen is greater than sizeof(sa_family_t) (i.e., greater than 2), and the name of the socket is contained in the first (addrlen -
> >> sizeof(sa_family_t)) bytes of sun_path.
> >
> > This specifically says that the address in the abstract namespace is
> > given by the additional bytes in sun_path that are covered by the
> > length of the address structure. If I understand correctly, that means
> > there's no way to derive the length from just the contents of the
> > sockaddr structure. We need
> > the actual length as specified by the caller to know which bytes
> > belong to the address. Note that it's valid for the abstract name to
> > contain Null bytes, so we cannot use those in any way or form to
> > detect whether further bytes belong to the address or not. It seems
> > valid to have an abstract name
> > consisting of 107 Null bytes in sun_path.
>
> Okay, it looks like bpf program is able to set abstract name as well.
> It would be good we have an example for this in selftest.
>
> With abstract address setable by bpf program, I guess you are right,
> we have to let user to explicitly tell us the address length.
>
> I assume it is possible for user to write an address like below:
> "a\0b\0"
> addr_len = offsetof(struct sockaddr_un, sun_path) + 4
> but actually it is illegal, right? We have to validate the
> legality of sun_path/addr_len beyond unix_validate_addr(), right?
>
> >
> >
> > On Tue, 13 Dec 2022 at 06:20, Yonghong Song <yhs@meta.com> wrote:
> >>
> >>
> >>
> >> On 12/10/22 11:35 AM, Daan De Meyer wrote:
> >>> These hooks allows intercepting bind(), connect(), getsockname(),
> >>> getpeername(), sendmsg() and recvmsg() for unix sockets. The unix
> >>> socket hooks get write access to the address length because the
> >>> address length is not fixed when dealing with unix sockets and
> >>> needs to be modified when a unix socket address is modified by
> >>> the hook. Because abstract socket unix addresses start with a
> >>> NUL byte, we cannot recalculate the socket address in kernelspace
> >>> after running the hook by calculating the length of the unix socket
> >>> path using strlen().
> >>
> >> Yes, although we cannot calculate the socket path length with
> >> strlen(). But we still have a method to find the path. In
> >> unix_seq_show(), the unix socket path is calculated as below,
> >>
> >>                   if (u->addr) {  // under a hash table lock here
> >>                           int i, len;
> >>                           seq_putc(seq, ' ');
> >>
> >>                           i = 0;
> >>                           len = u->addr->len -
> >>                                   offsetof(struct sockaddr_un, sun_path);
> >>                           if (u->addr->name->sun_path[0]) {
> >>                                   len--;
> >>                           } else {
> >>                                   seq_putc(seq, '@');
> >>                                   i++;
> >>                           }
> >>                           for ( ; i < len; i++)
> >>                                   seq_putc(seq, u->addr->name->sun_path[i] ?:
> >>                                            '@');
> >>                   }
> >>
> >> Is it possible that we can use the above method to find the
> >> address length so we won't need to pass uaddr_len to bpf program?
> >>
> >> Since all other hooks do not need to uaddr_len, you could add some
> >> new hooks for unix socket which can specially calculate uaddr_len
> >> after the bpf program run.
> >>
> >>>
> >>> This hook can be used when users want to multiplex syscall to a
> >>> single unix socket to multiple different processes behind the scenes
> >>> by redirecting the connect() and other syscalls to process specific
> >>> sockets.
> >>> ---
> >>>    include/linux/bpf-cgroup-defs.h |  6 +++
> >>>    include/linux/bpf-cgroup.h      | 29 ++++++++++-
> >>>    include/uapi/linux/bpf.h        | 14 ++++--
> >>>    kernel/bpf/cgroup.c             | 11 ++++-
> >>>    kernel/bpf/syscall.c            | 18 +++++++
> >>>    kernel/bpf/verifier.c           |  7 ++-
> >>>    net/core/filter.c               | 45 +++++++++++++++--
> >>>    net/unix/af_unix.c              | 85 +++++++++++++++++++++++++++++----
> >>>    tools/include/uapi/linux/bpf.h  | 14 ++++--
> >>>    9 files changed, 204 insertions(+), 25 deletions(-)
> >>>
> [...]
Yonghong Song Dec. 15, 2022, 6:32 p.m. UTC | #5
On 12/15/22 6:34 AM, Daan De Meyer wrote:
>>>> On 12/10/22 11:35 AM, Daan De Meyer wrote:
>>>>> These hooks allows intercepting bind(), connect(), getsockname(),
>>>>> getpeername(), sendmsg() and recvmsg() for unix sockets. The unix
>>>>> socket hooks get write access to the address length because the
>>>>> address length is not fixed when dealing with unix sockets and
>>>>> needs to be modified when a unix socket address is modified by
>>>>> the hook. Because abstract socket unix addresses start with a
>>>>> NUL byte, we cannot recalculate the socket address in kernelspace
>>>>> after running the hook by calculating the length of the unix socket
>>>>> path using strlen().
>>>>
>>>> Yes, although we cannot calculate the socket path length with
>>>> strlen(). But we still have a method to find the path. In
>>>> unix_seq_show(), the unix socket path is calculated as below,
>>>>
>>>>                    if (u->addr) {  // under a hash table lock here
>>>>                            int i, len;
>>>>                            seq_putc(seq, ' ');
>>>>
>>>>                            i = 0;
>>>>                            len = u->addr->len -
>>>>                                    offsetof(struct sockaddr_un, sun_path);
>>>>                            if (u->addr->name->sun_path[0]) {
>>>>                                    len--;
>>>>                            } else {
>>>>                                    seq_putc(seq, '@');
>>>>                                    i++;
>>>>                            }
>>>>                            for ( ; i < len; i++)
>>>>                                    seq_putc(seq, u->addr->name->sun_path[i] ?:
>>>>                                             '@');
>>>>                    }
>>>>
>>>> Is it possible that we can use the above method to find the
>>>> address length so we won't need to pass uaddr_len to bpf program?
>>>>
>>>> Since all other hooks do not need to uaddr_len, you could add some
>>>> new hooks for unix socket which can specially calculate uaddr_len
>>>> after the bpf program run.
>>>
>>> I don't think we can. If we look at the definition of abstract unix
>>> socket in the official man page:
>>>
>>>> abstract: an abstract socket address is distinguished (from a pathname socket) by the fact that sun_path[0] is a null byte ('\0').  The socket's address in this namespace is given by the additional bytes in sun_path that are covered by the specified length of the address structure.  (Null bytes in
>>>> the  name  have  no  special  significance.)   The name has no connection with filesystem pathnames.  When the address of an abstract socket is returned, the returned addrlen is greater than sizeof(sa_family_t) (i.e., greater than 2), and the name of the socket is contained in the first (addrlen -
>>>> sizeof(sa_family_t)) bytes of sun_path.
>>>
>>> This specifically says that the address in the abstract namespace is
>>> given by the additional bytes in sun_path that are covered by the
>>> length of the address structure. If I understand correctly, that means
>>> there's no way to derive the length from just the contents of the
>>> sockaddr structure. We need
>>> the actual length as specified by the caller to know which bytes
>>> belong to the address. Note that it's valid for the abstract name to
>>> contain Null bytes, so we cannot use those in any way or form to
>>> detect whether further bytes belong to the address or not. It seems
>>> valid to have an abstract name
>>> consisting of 107 Null bytes in sun_path.
>>
>> Okay, it looks like bpf program is able to set abstract name as well.
>> It would be good we have an example for this in selftest.
>>
>> With abstract address setable by bpf program, I guess you are right,
>> we have to let user to explicitly tell us the address length.
>>
>> I assume it is possible for user to write an address like below:
>> "a\0b\0"
>> addr_len = offsetof(struct sockaddr_un, sun_path) + 4
>> but actually it is illegal, right? We have to validate the
>> legality of sun_path/addr_len beyond unix_validate_addr(), right?
> 
> This is not actually illegal according to the man page I think, let's
> look at the following quote from the man page:
> 
>>   Pathname sockets
>>       When binding a socket to a pathname, a few rules should be observed for maximum portability and ease of coding:
>>
>>       *  The pathname in sun_path should be null-terminated.
>>
>>       *  The length of the pathname, including the terminating null byte, should not exceed the size of sun_path.
>>
>>       *  The addrlen argument that describes the enclosing sockaddr_un structure should have a value of at least:
>>
>>              offsetof(struct sockaddr_un, sun_path)+strlen(addr.sun_path)+1
>>
>>          or, more simply, addrlen can be specified as sizeof(struct sockaddr_un).
> 
> So when doing a pathname based path, the address length is allowed to
> be bigger than the actual path. So I don't think
> we need to do any more validation than what is done by
> unix_validate_addr(). The selftests are already using abstract
> unix sockets because they don't need any cleanup.

What about smaller, address "abc", and the length is
   offsetof(struct sockaddr_un) + 2

> 
> 
> On Tue, 13 Dec 2022 at 21:54, Yonghong Song <yhs@meta.com> wrote:
>>
>>
>>
>> On 12/13/22 3:36 AM, Daan De Meyer wrote:
>>>> On 12/10/22 11:35 AM, Daan De Meyer wrote:
>>>>> These hooks allows intercepting bind(), connect(), getsockname(),
>>>>> getpeername(), sendmsg() and recvmsg() for unix sockets. The unix
>>>>> socket hooks get write access to the address length because the
>>>>> address length is not fixed when dealing with unix sockets and
>>>>> needs to be modified when a unix socket address is modified by
>>>>> the hook. Because abstract socket unix addresses start with a
>>>>> NUL byte, we cannot recalculate the socket address in kernelspace
>>>>> after running the hook by calculating the length of the unix socket
>>>>> path using strlen().
>>>>
>>>> Yes, although we cannot calculate the socket path length with
>>>> strlen(). But we still have a method to find the path. In
>>>> unix_seq_show(), the unix socket path is calculated as below,
>>>>
>>>>                    if (u->addr) {  // under a hash table lock here
>>>>                            int i, len;
>>>>                            seq_putc(seq, ' ');
>>>>
>>>>                            i = 0;
>>>>                            len = u->addr->len -
>>>>                                    offsetof(struct sockaddr_un, sun_path);
>>>>                            if (u->addr->name->sun_path[0]) {
>>>>                                    len--;
>>>>                            } else {
>>>>                                    seq_putc(seq, '@');
>>>>                                    i++;
>>>>                            }
>>>>                            for ( ; i < len; i++)
>>>>                                    seq_putc(seq, u->addr->name->sun_path[i] ?:
>>>>                                             '@');
>>>>                    }
>>>>
>>>> Is it possible that we can use the above method to find the
>>>> address length so we won't need to pass uaddr_len to bpf program?
>>>>
>>>> Since all other hooks do not need to uaddr_len, you could add some
>>>> new hooks for unix socket which can specially calculate uaddr_len
>>>> after the bpf program run.
>>>
>>> I don't think we can. If we look at the definition of abstract unix
>>> socket in the official man page:
>>>
>>>> abstract: an abstract socket address is distinguished (from a pathname socket) by the fact that sun_path[0] is a null byte ('\0').  The socket's address in this namespace is given by the additional bytes in sun_path that are covered by the specified length of the address structure.  (Null bytes in
>>>> the  name  have  no  special  significance.)   The name has no connection with filesystem pathnames.  When the address of an abstract socket is returned, the returned addrlen is greater than sizeof(sa_family_t) (i.e., greater than 2), and the name of the socket is contained in the first (addrlen -
>>>> sizeof(sa_family_t)) bytes of sun_path.
>>>
>>> This specifically says that the address in the abstract namespace is
>>> given by the additional bytes in sun_path that are covered by the
>>> length of the address structure. If I understand correctly, that means
>>> there's no way to derive the length from just the contents of the
>>> sockaddr structure. We need
>>> the actual length as specified by the caller to know which bytes
>>> belong to the address. Note that it's valid for the abstract name to
>>> contain Null bytes, so we cannot use those in any way or form to
>>> detect whether further bytes belong to the address or not. It seems
>>> valid to have an abstract name
>>> consisting of 107 Null bytes in sun_path.
>>
>> Okay, it looks like bpf program is able to set abstract name as well.
>> It would be good we have an example for this in selftest.
>>
>> With abstract address setable by bpf program, I guess you are right,
>> we have to let user to explicitly tell us the address length.
>>
>> I assume it is possible for user to write an address like below:
>> "a\0b\0"
>> addr_len = offsetof(struct sockaddr_un, sun_path) + 4
>> but actually it is illegal, right? We have to validate the
>> legality of sun_path/addr_len beyond unix_validate_addr(), right?
>>
>>>
>>>
>>> On Tue, 13 Dec 2022 at 06:20, Yonghong Song <yhs@meta.com> wrote:
>>>>
>>>>
>>>>
>>>> On 12/10/22 11:35 AM, Daan De Meyer wrote:
>>>>> These hooks allows intercepting bind(), connect(), getsockname(),
>>>>> getpeername(), sendmsg() and recvmsg() for unix sockets. The unix
>>>>> socket hooks get write access to the address length because the
>>>>> address length is not fixed when dealing with unix sockets and
>>>>> needs to be modified when a unix socket address is modified by
>>>>> the hook. Because abstract socket unix addresses start with a
>>>>> NUL byte, we cannot recalculate the socket address in kernelspace
>>>>> after running the hook by calculating the length of the unix socket
>>>>> path using strlen().
>>>>
>>>> Yes, although we cannot calculate the socket path length with
>>>> strlen(). But we still have a method to find the path. In
>>>> unix_seq_show(), the unix socket path is calculated as below,
>>>>
>>>>                    if (u->addr) {  // under a hash table lock here
>>>>                            int i, len;
>>>>                            seq_putc(seq, ' ');
>>>>
>>>>                            i = 0;
>>>>                            len = u->addr->len -
>>>>                                    offsetof(struct sockaddr_un, sun_path);
>>>>                            if (u->addr->name->sun_path[0]) {
>>>>                                    len--;
>>>>                            } else {
>>>>                                    seq_putc(seq, '@');
>>>>                                    i++;
>>>>                            }
>>>>                            for ( ; i < len; i++)
>>>>                                    seq_putc(seq, u->addr->name->sun_path[i] ?:
>>>>                                             '@');
>>>>                    }
>>>>
>>>> Is it possible that we can use the above method to find the
>>>> address length so we won't need to pass uaddr_len to bpf program?
>>>>
>>>> Since all other hooks do not need to uaddr_len, you could add some
>>>> new hooks for unix socket which can specially calculate uaddr_len
>>>> after the bpf program run.
>>>>
>>>>>
>>>>> This hook can be used when users want to multiplex syscall to a
>>>>> single unix socket to multiple different processes behind the scenes
>>>>> by redirecting the connect() and other syscalls to process specific
>>>>> sockets.
>>>>> ---
>>>>>     include/linux/bpf-cgroup-defs.h |  6 +++
>>>>>     include/linux/bpf-cgroup.h      | 29 ++++++++++-
>>>>>     include/uapi/linux/bpf.h        | 14 ++++--
>>>>>     kernel/bpf/cgroup.c             | 11 ++++-
>>>>>     kernel/bpf/syscall.c            | 18 +++++++
>>>>>     kernel/bpf/verifier.c           |  7 ++-
>>>>>     net/core/filter.c               | 45 +++++++++++++++--
>>>>>     net/unix/af_unix.c              | 85 +++++++++++++++++++++++++++++----
>>>>>     tools/include/uapi/linux/bpf.h  | 14 ++++--
>>>>>     9 files changed, 204 insertions(+), 25 deletions(-)
>>>>>
>> [...]
diff mbox series

Patch

diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
index 7b121bd780eb..8196ccb81915 100644
--- a/include/linux/bpf-cgroup-defs.h
+++ b/include/linux/bpf-cgroup-defs.h
@@ -26,21 +26,27 @@  enum cgroup_bpf_attach_type {
 	CGROUP_DEVICE,
 	CGROUP_INET4_BIND,
 	CGROUP_INET6_BIND,
+	CGROUP_UNIX_BIND,
 	CGROUP_INET4_CONNECT,
 	CGROUP_INET6_CONNECT,
+	CGROUP_UNIX_CONNECT,
 	CGROUP_INET4_POST_BIND,
 	CGROUP_INET6_POST_BIND,
 	CGROUP_UDP4_SENDMSG,
 	CGROUP_UDP6_SENDMSG,
+	CGROUP_UNIX_SENDMSG,
 	CGROUP_SYSCTL,
 	CGROUP_UDP4_RECVMSG,
 	CGROUP_UDP6_RECVMSG,
+	CGROUP_UNIX_RECVMSG,
 	CGROUP_GETSOCKOPT,
 	CGROUP_SETSOCKOPT,
 	CGROUP_INET4_GETPEERNAME,
 	CGROUP_INET6_GETPEERNAME,
+	CGROUP_UNIX_GETPEERNAME,
 	CGROUP_INET4_GETSOCKNAME,
 	CGROUP_INET6_GETSOCKNAME,
+	CGROUP_UNIX_GETSOCKNAME,
 	CGROUP_INET_SOCK_RELEASE,
 	CGROUP_LSM_START,
 	CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1,
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 3ab2f06ddc8a..4de3016f01e4 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -46,21 +46,27 @@  to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type)
 	CGROUP_ATYPE(CGROUP_DEVICE);
 	CGROUP_ATYPE(CGROUP_INET4_BIND);
 	CGROUP_ATYPE(CGROUP_INET6_BIND);
+	CGROUP_ATYPE(CGROUP_UNIX_BIND);
 	CGROUP_ATYPE(CGROUP_INET4_CONNECT);
 	CGROUP_ATYPE(CGROUP_INET6_CONNECT);
+	CGROUP_ATYPE(CGROUP_UNIX_CONNECT);
 	CGROUP_ATYPE(CGROUP_INET4_POST_BIND);
 	CGROUP_ATYPE(CGROUP_INET6_POST_BIND);
 	CGROUP_ATYPE(CGROUP_UDP4_SENDMSG);
 	CGROUP_ATYPE(CGROUP_UDP6_SENDMSG);
+	CGROUP_ATYPE(CGROUP_UNIX_SENDMSG);
 	CGROUP_ATYPE(CGROUP_SYSCTL);
 	CGROUP_ATYPE(CGROUP_UDP4_RECVMSG);
 	CGROUP_ATYPE(CGROUP_UDP6_RECVMSG);
+	CGROUP_ATYPE(CGROUP_UNIX_RECVMSG);
 	CGROUP_ATYPE(CGROUP_GETSOCKOPT);
 	CGROUP_ATYPE(CGROUP_SETSOCKOPT);
 	CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME);
 	CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME);
+	CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME);
 	CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME);
 	CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME);
+	CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME);
 	CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE);
 	default:
 		return CGROUP_BPF_ATTACH_TYPE_INVALID;
@@ -273,9 +279,13 @@  static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
 		__ret;                                                       \
 	})
 
+#define BPF_CGROUP_RUN_PROG_UNIX_BIND_LOCK(sk, uaddr, uaddrlen)			\
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_BIND, NULL)
+
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk)				       \
 	((cgroup_bpf_enabled(CGROUP_INET4_CONNECT) ||		       \
-	  cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) &&		       \
+	  cgroup_bpf_enabled(CGROUP_INET6_CONNECT) ||		       \
+	  cgroup_bpf_enabled(CGROUP_UNIX_CONNECT)) &&		       \
 	 (sk)->sk_prot->pre_connect)
 
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen)		       \
@@ -284,24 +294,36 @@  static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen)		       \
 	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT)
 
+#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT(sk, uaddr, uaddrlen)	               \
+	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT)
+
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen)	       \
 	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL)
 
 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen)	       \
 	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL)
 
+#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen)	       \
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL)
+
 #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx)       \
 	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx)
 
 #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx)       \
 	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx)
 
+#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx)	\
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx)
+
 #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen)		\
 	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL)
 
 #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen)		\
 	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL)
 
+#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen)		\
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL)
+
 /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
  * fullsock and its parent fullsock cannot be traced by
  * sk_to_full_sk().
@@ -487,16 +509,21 @@  static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_BIND_LOCK(sk, uaddr, uaddrlen) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9e3c33f83bba..b73e4da458fd 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -999,17 +999,21 @@  enum bpf_attach_type {
 	BPF_SK_MSG_VERDICT,
 	BPF_CGROUP_INET4_BIND,
 	BPF_CGROUP_INET6_BIND,
+	BPF_CGROUP_UNIX_BIND,
 	BPF_CGROUP_INET4_CONNECT,
 	BPF_CGROUP_INET6_CONNECT,
+	BPF_CGROUP_UNIX_CONNECT,
 	BPF_CGROUP_INET4_POST_BIND,
 	BPF_CGROUP_INET6_POST_BIND,
 	BPF_CGROUP_UDP4_SENDMSG,
 	BPF_CGROUP_UDP6_SENDMSG,
+	BPF_CGROUP_UNIX_SENDMSG,
 	BPF_LIRC_MODE2,
 	BPF_FLOW_DISSECTOR,
 	BPF_CGROUP_SYSCTL,
 	BPF_CGROUP_UDP4_RECVMSG,
 	BPF_CGROUP_UDP6_RECVMSG,
+	BPF_CGROUP_UNIX_RECVMSG,
 	BPF_CGROUP_GETSOCKOPT,
 	BPF_CGROUP_SETSOCKOPT,
 	BPF_TRACE_RAW_TP,
@@ -1020,8 +1024,10 @@  enum bpf_attach_type {
 	BPF_TRACE_ITER,
 	BPF_CGROUP_INET4_GETPEERNAME,
 	BPF_CGROUP_INET6_GETPEERNAME,
+	BPF_CGROUP_UNIX_GETPEERNAME,
 	BPF_CGROUP_INET4_GETSOCKNAME,
 	BPF_CGROUP_INET6_GETSOCKNAME,
+	BPF_CGROUP_UNIX_GETSOCKNAME,
 	BPF_XDP_DEVMAP,
 	BPF_CGROUP_INET_SOCK_RELEASE,
 	BPF_XDP_CPUMAP,
@@ -2575,8 +2581,8 @@  union bpf_attr {
  * 		*bpf_socket* should be one of the following:
  *
  * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
- * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
- * 		  and **BPF_CGROUP_INET6_CONNECT**.
+ * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
+ * 		  **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
  *
  * 		This helper actually implements a subset of **setsockopt()**.
  * 		It supports the following *level*\ s:
@@ -2809,8 +2815,8 @@  union bpf_attr {
  * 		*bpf_socket* should be one of the following:
  *
  * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
- * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
- * 		  and **BPF_CGROUP_INET6_CONNECT**.
+ * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
+ * 		  **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
  *
  * 		This helper actually implements a subset of **getsockopt()**.
  * 		It supports the same set of *optname*\ s that is supported by
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index f97afed8a115..eeb349cef624 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1455,7 +1455,7 @@  EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
  * @flags: Pointer to u32 which contains higher bits of BPF program
  *         return value (OR'ed together).
  *
- * socket is expected to be of type INET or INET6.
+ * socket is expected to be of type INET, INET6 or UNIX.
  *
  * This function will return %-EPERM if an attached program is found and
  * returned value != 1 during execution. In all other cases, 0 is returned.
@@ -1479,7 +1479,8 @@  int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
 	/* Check socket family since not all sockets represent network
 	 * endpoint (e.g. AF_UNIX).
 	 */
-	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
+	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 &&
+		sk->sk_family != AF_UNIX)
 		return 0;
 
 	if (!ctx.uaddr) {
@@ -2493,10 +2494,13 @@  cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		case BPF_CGROUP_SOCK_OPS:
 		case BPF_CGROUP_UDP4_RECVMSG:
 		case BPF_CGROUP_UDP6_RECVMSG:
+		case BPF_CGROUP_UNIX_RECVMSG:
 		case BPF_CGROUP_INET4_GETPEERNAME:
 		case BPF_CGROUP_INET6_GETPEERNAME:
+		case BPF_CGROUP_UNIX_GETPEERNAME:
 		case BPF_CGROUP_INET4_GETSOCKNAME:
 		case BPF_CGROUP_INET6_GETSOCKNAME:
+		case BPF_CGROUP_UNIX_GETSOCKNAME:
 			return NULL;
 		default:
 			return &bpf_get_retval_proto;
@@ -2508,10 +2512,13 @@  cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		case BPF_CGROUP_SOCK_OPS:
 		case BPF_CGROUP_UDP4_RECVMSG:
 		case BPF_CGROUP_UDP6_RECVMSG:
+		case BPF_CGROUP_UNIX_RECVMSG:
 		case BPF_CGROUP_INET4_GETPEERNAME:
 		case BPF_CGROUP_INET6_GETPEERNAME:
+		case BPF_CGROUP_UNIX_GETPEERNAME:
 		case BPF_CGROUP_INET4_GETSOCKNAME:
 		case BPF_CGROUP_INET6_GETSOCKNAME:
+		case BPF_CGROUP_UNIX_GETSOCKNAME:
 			return NULL;
 		default:
 			return &bpf_set_retval_proto;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35972afb6850..142b5ece735f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2370,16 +2370,22 @@  bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 		switch (expected_attach_type) {
 		case BPF_CGROUP_INET4_BIND:
 		case BPF_CGROUP_INET6_BIND:
+		case BPF_CGROUP_UNIX_BIND:
 		case BPF_CGROUP_INET4_CONNECT:
 		case BPF_CGROUP_INET6_CONNECT:
+		case BPF_CGROUP_UNIX_CONNECT:
 		case BPF_CGROUP_INET4_GETPEERNAME:
 		case BPF_CGROUP_INET6_GETPEERNAME:
+		case BPF_CGROUP_UNIX_GETPEERNAME:
 		case BPF_CGROUP_INET4_GETSOCKNAME:
 		case BPF_CGROUP_INET6_GETSOCKNAME:
+		case BPF_CGROUP_UNIX_GETSOCKNAME:
 		case BPF_CGROUP_UDP4_SENDMSG:
 		case BPF_CGROUP_UDP6_SENDMSG:
+		case BPF_CGROUP_UNIX_SENDMSG:
 		case BPF_CGROUP_UDP4_RECVMSG:
 		case BPF_CGROUP_UDP6_RECVMSG:
+		case BPF_CGROUP_UNIX_RECVMSG:
 			return 0;
 		default:
 			return -EINVAL;
@@ -3418,16 +3424,22 @@  attach_type_to_prog_type(enum bpf_attach_type attach_type)
 		return BPF_PROG_TYPE_CGROUP_SOCK;
 	case BPF_CGROUP_INET4_BIND:
 	case BPF_CGROUP_INET6_BIND:
+	case BPF_CGROUP_UNIX_BIND:
 	case BPF_CGROUP_INET4_CONNECT:
 	case BPF_CGROUP_INET6_CONNECT:
+	case BPF_CGROUP_UNIX_CONNECT:
 	case BPF_CGROUP_INET4_GETPEERNAME:
 	case BPF_CGROUP_INET6_GETPEERNAME:
+	case BPF_CGROUP_UNIX_GETPEERNAME:
 	case BPF_CGROUP_INET4_GETSOCKNAME:
 	case BPF_CGROUP_INET6_GETSOCKNAME:
+	case BPF_CGROUP_UNIX_GETSOCKNAME:
 	case BPF_CGROUP_UDP4_SENDMSG:
 	case BPF_CGROUP_UDP6_SENDMSG:
+	case BPF_CGROUP_UNIX_SENDMSG:
 	case BPF_CGROUP_UDP4_RECVMSG:
 	case BPF_CGROUP_UDP6_RECVMSG:
+	case BPF_CGROUP_UNIX_RECVMSG:
 		return BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
 	case BPF_CGROUP_SOCK_OPS:
 		return BPF_PROG_TYPE_SOCK_OPS;
@@ -3583,18 +3595,24 @@  static int bpf_prog_query(const union bpf_attr *attr,
 	case BPF_CGROUP_INET_SOCK_RELEASE:
 	case BPF_CGROUP_INET4_BIND:
 	case BPF_CGROUP_INET6_BIND:
+	case BPF_CGROUP_UNIX_BIND:
 	case BPF_CGROUP_INET4_POST_BIND:
 	case BPF_CGROUP_INET6_POST_BIND:
 	case BPF_CGROUP_INET4_CONNECT:
 	case BPF_CGROUP_INET6_CONNECT:
+	case BPF_CGROUP_UNIX_CONNECT:
 	case BPF_CGROUP_INET4_GETPEERNAME:
 	case BPF_CGROUP_INET6_GETPEERNAME:
+	case BPF_CGROUP_UNIX_GETPEERNAME:
 	case BPF_CGROUP_INET4_GETSOCKNAME:
 	case BPF_CGROUP_INET6_GETSOCKNAME:
+	case BPF_CGROUP_UNIX_GETSOCKNAME:
 	case BPF_CGROUP_UDP4_SENDMSG:
 	case BPF_CGROUP_UDP6_SENDMSG:
+	case BPF_CGROUP_UNIX_SENDMSG:
 	case BPF_CGROUP_UDP4_RECVMSG:
 	case BPF_CGROUP_UDP6_RECVMSG:
+	case BPF_CGROUP_UNIX_RECVMSG:
 	case BPF_CGROUP_SOCK_OPS:
 	case BPF_CGROUP_DEVICE:
 	case BPF_CGROUP_SYSCTL:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1d51bd9596da..c06a6e43676c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11991,14 +11991,19 @@  static int check_return_code(struct bpf_verifier_env *env)
 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
 		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
+		    env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG ||
 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
+		    env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME ||
 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
-		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
+		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME ||
+		    env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME)
 			range = tnum_range(1, 1);
 		if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
 			range = tnum_range(0, 3);
+		if (env->prog->expected_attach_type == BPF_CGROUP_UNIX_BIND)
+			range = tnum_range(0, 1);
 		break;
 	case BPF_PROG_TYPE_CGROUP_SKB:
 		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
diff --git a/net/core/filter.c b/net/core/filter.c
index cc86b38fc764..0c8427305009 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7666,6 +7666,7 @@  sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		switch (prog->expected_attach_type) {
 		case BPF_CGROUP_INET4_CONNECT:
 		case BPF_CGROUP_INET6_CONNECT:
+		case BPF_CGROUP_UNIX_CONNECT:
 			return &bpf_bind_proto;
 		default:
 			return NULL;
@@ -7694,16 +7695,22 @@  sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		switch (prog->expected_attach_type) {
 		case BPF_CGROUP_INET4_BIND:
 		case BPF_CGROUP_INET6_BIND:
+		case BPF_CGROUP_UNIX_BIND:
 		case BPF_CGROUP_INET4_CONNECT:
 		case BPF_CGROUP_INET6_CONNECT:
+		case BPF_CGROUP_UNIX_CONNECT:
 		case BPF_CGROUP_UDP4_RECVMSG:
 		case BPF_CGROUP_UDP6_RECVMSG:
+		case BPF_CGROUP_UNIX_RECVMSG:
 		case BPF_CGROUP_UDP4_SENDMSG:
 		case BPF_CGROUP_UDP6_SENDMSG:
+		case BPF_CGROUP_UNIX_SENDMSG:
 		case BPF_CGROUP_INET4_GETPEERNAME:
 		case BPF_CGROUP_INET6_GETPEERNAME:
+		case BPF_CGROUP_UNIX_GETPEERNAME:
 		case BPF_CGROUP_INET4_GETSOCKNAME:
 		case BPF_CGROUP_INET6_GETSOCKNAME:
+		case BPF_CGROUP_UNIX_GETSOCKNAME:
 			return &bpf_sock_addr_setsockopt_proto;
 		default:
 			return NULL;
@@ -7712,16 +7719,22 @@  sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		switch (prog->expected_attach_type) {
 		case BPF_CGROUP_INET4_BIND:
 		case BPF_CGROUP_INET6_BIND:
+		case BPF_CGROUP_UNIX_BIND:
 		case BPF_CGROUP_INET4_CONNECT:
 		case BPF_CGROUP_INET6_CONNECT:
+		case BPF_CGROUP_UNIX_CONNECT:
 		case BPF_CGROUP_UDP4_RECVMSG:
 		case BPF_CGROUP_UDP6_RECVMSG:
+		case BPF_CGROUP_UNIX_RECVMSG:
 		case BPF_CGROUP_UDP4_SENDMSG:
 		case BPF_CGROUP_UDP6_SENDMSG:
+		case BPF_CGROUP_UNIX_SENDMSG:
 		case BPF_CGROUP_INET4_GETPEERNAME:
 		case BPF_CGROUP_INET6_GETPEERNAME:
+		case BPF_CGROUP_UNIX_GETPEERNAME:
 		case BPF_CGROUP_INET4_GETSOCKNAME:
 		case BPF_CGROUP_INET6_GETSOCKNAME:
+		case BPF_CGROUP_UNIX_GETSOCKNAME:
 			return &bpf_sock_addr_getsockopt_proto;
 		default:
 			return NULL;
@@ -8784,8 +8797,8 @@  static bool sock_addr_is_valid_access(int off, int size,
 	if (off % size != 0)
 		return false;
 
-	/* Disallow access to IPv6 fields from IPv4 contex and vise
-	 * versa.
+	/* Disallow access to fields not belonging to the attach type's address
+	 * family.
 	 */
 	switch (off) {
 	case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
@@ -8832,7 +8845,18 @@  static bool sock_addr_is_valid_access(int off, int size,
 		}
 		break;
 	case bpf_ctx_range_till(struct bpf_sock_addr, user_path[0], user_path[107]):
-		return false;
+		switch (prog->expected_attach_type) {
+		case BPF_CGROUP_UNIX_BIND:
+		case BPF_CGROUP_UNIX_CONNECT:
+		case BPF_CGROUP_UNIX_SENDMSG:
+		case BPF_CGROUP_UNIX_RECVMSG:
+		case BPF_CGROUP_UNIX_GETPEERNAME:
+		case BPF_CGROUP_UNIX_GETSOCKNAME:
+			break;
+		default:
+			return false;
+		}
+		break;
 	}
 
 	switch (off) {
@@ -8884,8 +8908,19 @@  static bool sock_addr_is_valid_access(int off, int size,
 			return false;
 		break;
 	case bpf_ctx_range(struct bpf_sock_addr, user_addrlen):
-		if (type != BPF_READ)
-			return false;
+		if (type != BPF_READ) {
+			switch (prog->expected_attach_type) {
+			case BPF_CGROUP_UNIX_BIND:
+			case BPF_CGROUP_UNIX_CONNECT:
+			case BPF_CGROUP_UNIX_SENDMSG:
+			case BPF_CGROUP_UNIX_RECVMSG:
+			case BPF_CGROUP_UNIX_GETPEERNAME:
+			case BPF_CGROUP_UNIX_GETSOCKNAME:
+				break;
+			default:
+				return false;
+			}
+		}
 
 		if (size != sizeof(__u32))
 			return false;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index b3545fc68097..8d250cb75636 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -115,6 +115,7 @@ 
 #include <linux/freezer.h>
 #include <linux/file.h>
 #include <linux/btf_ids.h>
+#include <linux/bpf-cgroup.h>
 
 #include "scm.h"
 
@@ -1302,6 +1303,12 @@  static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct sock *sk = sock->sk;
 	int err;
 
+	if (cgroup_bpf_enabled(CGROUP_UNIX_BIND)) {
+		err = BPF_CGROUP_RUN_PROG_UNIX_BIND_LOCK(sk, uaddr, &addr_len);
+		if (err)
+			return err;
+	}
+
 	if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
 	    sunaddr->sun_family == AF_UNIX)
 		return unix_autobind(sk);
@@ -1356,6 +1363,13 @@  static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 		goto out;
 
 	if (addr->sa_family != AF_UNSPEC) {
+		if (cgroup_bpf_enabled(CGROUP_UNIX_CONNECT)) {
+			err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr,
+								    &alen);
+			if (err)
+				goto out;
+		}
+
 		err = unix_validate_addr(sunaddr, alen);
 		if (err)
 			goto out;
@@ -1464,6 +1478,13 @@  static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 	int err;
 	int st;
 
+	if (cgroup_bpf_enabled(CGROUP_UNIX_CONNECT)) {
+		err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr,
+							    &addr_len);
+		if (err)
+			goto out;
+	}
+
 	err = unix_validate_addr(sunaddr, addr_len);
 	if (err)
 		goto out;
@@ -1724,7 +1745,7 @@  static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
 	struct sock *sk = sock->sk;
 	struct unix_address *addr;
 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
-	int err = 0;
+	int addr_len = 0, err = 0;
 
 	if (peer) {
 		sk = unix_peer_get(sk);
@@ -1741,14 +1762,35 @@  static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
 	if (!addr) {
 		sunaddr->sun_family = AF_UNIX;
 		sunaddr->sun_path[0] = 0;
-		err = offsetof(struct sockaddr_un, sun_path);
+		addr_len = offsetof(struct sockaddr_un, sun_path);
 	} else {
-		err = addr->len;
+		addr_len = addr->len;
 		memcpy(sunaddr, addr->name, addr->len);
 	}
+
+	if (peer && cgroup_bpf_enabled(CGROUP_UNIX_GETPEERNAME)) {
+		err = BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &addr_len,
+					     CGROUP_UNIX_GETPEERNAME);
+		if (err)
+			goto out;
+
+		err = unix_validate_addr(sunaddr, addr_len);
+		if (err)
+			goto out;
+	} else if (cgroup_bpf_enabled(CGROUP_UNIX_GETSOCKNAME)) {
+		err = BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &addr_len,
+					     CGROUP_UNIX_GETSOCKNAME);
+		if (err)
+			goto out;
+
+		err = unix_validate_addr(sunaddr, addr_len);
+		if (err)
+			goto out;
+	}
+
 	sock_put(sk);
 out:
-	return err;
+	return err ?: addr_len;
 }
 
 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
@@ -1910,6 +1952,13 @@  static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 		goto out;
 
 	if (msg->msg_namelen) {
+		if (cgroup_bpf_enabled(CGROUP_UNIX_SENDMSG)) {
+			err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(
+				sk, msg->msg_name, &msg->msg_namelen, NULL);
+			if (err)
+				goto out;
+		}
+
 		err = unix_validate_addr(sunaddr, msg->msg_namelen);
 		if (err)
 			goto out;
@@ -2404,14 +2453,29 @@  static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
 	return unix_dgram_recvmsg(sock, msg, size, flags);
 }
 
-static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
+static int unix_recvmsg_copy_addr(struct msghdr *msg, struct sock *sk)
 {
 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
+	int err;
 
 	if (addr) {
 		msg->msg_namelen = addr->len;
 		memcpy(msg->msg_name, addr->name, addr->len);
+
+		if (cgroup_bpf_enabled(CGROUP_UNIX_RECVMSG)) {
+			err = BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(
+				sk, msg->msg_name, &msg->msg_namelen);
+			if (err)
+				return err;
+
+			err = unix_validate_addr(msg->msg_name,
+						 msg->msg_namelen);
+			if (err)
+				return err;
+		}
 	}
+
+	return 0;
 }
 
 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
@@ -2466,8 +2530,11 @@  int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
 						EPOLLOUT | EPOLLWRNORM |
 						EPOLLWRBAND);
 
-	if (msg->msg_name)
-		unix_copy_addr(msg, skb->sk);
+	if (msg->msg_name) {
+		err = unix_recvmsg_copy_addr(msg, skb->sk);
+		if (err)
+			goto out_free;
+	}
 
 	if (size > skb->len - skip)
 		size = skb->len - skip;
@@ -2821,7 +2888,9 @@  static int unix_stream_read_generic(struct unix_stream_read_state *state,
 		if (state->msg && state->msg->msg_name) {
 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
 					 state->msg->msg_name);
-			unix_copy_addr(state->msg, skb->sk);
+			err = unix_recvmsg_copy_addr(state->msg, skb->sk);
+			if (err)
+				break;
 			sunaddr = NULL;
 		}
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9e3c33f83bba..b73e4da458fd 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -999,17 +999,21 @@  enum bpf_attach_type {
 	BPF_SK_MSG_VERDICT,
 	BPF_CGROUP_INET4_BIND,
 	BPF_CGROUP_INET6_BIND,
+	BPF_CGROUP_UNIX_BIND,
 	BPF_CGROUP_INET4_CONNECT,
 	BPF_CGROUP_INET6_CONNECT,
+	BPF_CGROUP_UNIX_CONNECT,
 	BPF_CGROUP_INET4_POST_BIND,
 	BPF_CGROUP_INET6_POST_BIND,
 	BPF_CGROUP_UDP4_SENDMSG,
 	BPF_CGROUP_UDP6_SENDMSG,
+	BPF_CGROUP_UNIX_SENDMSG,
 	BPF_LIRC_MODE2,
 	BPF_FLOW_DISSECTOR,
 	BPF_CGROUP_SYSCTL,
 	BPF_CGROUP_UDP4_RECVMSG,
 	BPF_CGROUP_UDP6_RECVMSG,
+	BPF_CGROUP_UNIX_RECVMSG,
 	BPF_CGROUP_GETSOCKOPT,
 	BPF_CGROUP_SETSOCKOPT,
 	BPF_TRACE_RAW_TP,
@@ -1020,8 +1024,10 @@  enum bpf_attach_type {
 	BPF_TRACE_ITER,
 	BPF_CGROUP_INET4_GETPEERNAME,
 	BPF_CGROUP_INET6_GETPEERNAME,
+	BPF_CGROUP_UNIX_GETPEERNAME,
 	BPF_CGROUP_INET4_GETSOCKNAME,
 	BPF_CGROUP_INET6_GETSOCKNAME,
+	BPF_CGROUP_UNIX_GETSOCKNAME,
 	BPF_XDP_DEVMAP,
 	BPF_CGROUP_INET_SOCK_RELEASE,
 	BPF_XDP_CPUMAP,
@@ -2575,8 +2581,8 @@  union bpf_attr {
  * 		*bpf_socket* should be one of the following:
  *
  * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
- * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
- * 		  and **BPF_CGROUP_INET6_CONNECT**.
+ * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
+ * 		  **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
  *
  * 		This helper actually implements a subset of **setsockopt()**.
  * 		It supports the following *level*\ s:
@@ -2809,8 +2815,8 @@  union bpf_attr {
  * 		*bpf_socket* should be one of the following:
  *
  * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
- * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
- * 		  and **BPF_CGROUP_INET6_CONNECT**.
+ * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
+ * 		  **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
  *
  * 		This helper actually implements a subset of **getsockopt()**.
  * 		It supports the same set of *optname*\ s that is supported by