diff mbox series

[v2,8/8] io_uring/cmd: BPF hook for setsockopt cmd

Message ID 20230808134049.1407498-9-leitao@debian.org (mailing list archive)
State Superseded
Headers show
Series io_uring: Initial support for {s,g}etsockopt commands | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply
bpf/vmtest-bpf-PR success PR summary
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-3 pending Logs for build for s390x with gcc
bpf/vmtest-bpf-VM_Test-4 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-5 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-6 success Logs for set-matrix

Commit Message

Breno Leitao Aug. 8, 2023, 1:40 p.m. UTC
Add support for BPF hooks for io_uring setsockopts command.

This implementation follows a similar approach to what
__sys_setsockopt() does, but, operates only on kernel memory instead of
user memory (which is also possible, but not preferred since the kernel
memory is already available)

Signed-off-by: Breno Leitao <leitao@debian.org>
---
 io_uring/uring_cmd.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

Comments

Martin KaFai Lau Aug. 9, 2023, 10:02 p.m. UTC | #1
On 8/8/23 6:40 AM, Breno Leitao wrote:
> Add support for BPF hooks for io_uring setsockopts command.
> 
> This implementation follows a similar approach to what
> __sys_setsockopt() does, but, operates only on kernel memory instead of
> user memory (which is also possible, but not preferred since the kernel
> memory is already available)
> 
> Signed-off-by: Breno Leitao <leitao@debian.org>
> ---
>   io_uring/uring_cmd.c | 23 +++++++++++++++++++++--
>   1 file changed, 21 insertions(+), 2 deletions(-)
> 
> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
> index 3693e5779229..b7b27e4dbddd 100644
> --- a/io_uring/uring_cmd.c
> +++ b/io_uring/uring_cmd.c
> @@ -205,23 +205,42 @@ static inline int io_uring_cmd_setsockopt(struct socket *sock,
>   {
>   	void __user *optval = u64_to_user_ptr(READ_ONCE(cmd->sqe->optval));
>   	int optname = READ_ONCE(cmd->sqe->optname);
> +	sockptr_t optval_s = USER_SOCKPTR(optval);
>   	int optlen = READ_ONCE(cmd->sqe->optlen);
>   	int level = READ_ONCE(cmd->sqe->level);
> +	char *kernel_optval = NULL;
>   	int err;
>   
>   	err = security_socket_setsockopt(sock, level, optname);
>   	if (err)
>   		return err;
>   
> +	if (!in_compat_syscall()) {
> +		err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
> +						     &optname,
> +						     USER_SOCKPTR(optval),
> +						     &optlen,
> +						     &kernel_optval);
> +		if (err < 0)
> +			return err;
> +		if (err > 0)
> +			return 0;
> +
> +		/* Replace optval by the one returned by BPF */
> +		if (kernel_optval)
> +			optval_s = KERNEL_SOCKPTR(kernel_optval);
> +	}
> +
>   	if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
>   		err = sock_setsockopt(sock, level, optname,
> -				      USER_SOCKPTR(optval), optlen);
> +				      optval_s, optlen);
>   	else if (unlikely(!sock->ops->setsockopt))
>   		err = -EOPNOTSUPP;
>   	else
>   		err = sock->ops->setsockopt(sock, level, optname,
> -					    USER_SOCKPTR(koptval), optlen);
> +					    optval_s, optlen);

The bpf side changes make sense. Thanks.

With all the bpf pieces in place, __sys_{get,set}sockopt() is looking very 
similar to io_uring_cmd_{get,set}sockopt(). There are small differences like one 
takes fd and another already has a sock ptr, and io_uring_cmd_getsockopt() is 
SOL_SOCKET only. In general, can they be refactored somehow such that future 
changes don't have to be made in multiple places?
diff mbox series

Patch

diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 3693e5779229..b7b27e4dbddd 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -205,23 +205,42 @@  static inline int io_uring_cmd_setsockopt(struct socket *sock,
 {
 	void __user *optval = u64_to_user_ptr(READ_ONCE(cmd->sqe->optval));
 	int optname = READ_ONCE(cmd->sqe->optname);
+	sockptr_t optval_s = USER_SOCKPTR(optval);
 	int optlen = READ_ONCE(cmd->sqe->optlen);
 	int level = READ_ONCE(cmd->sqe->level);
+	char *kernel_optval = NULL;
 	int err;
 
 	err = security_socket_setsockopt(sock, level, optname);
 	if (err)
 		return err;
 
+	if (!in_compat_syscall()) {
+		err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
+						     &optname,
+						     USER_SOCKPTR(optval),
+						     &optlen,
+						     &kernel_optval);
+		if (err < 0)
+			return err;
+		if (err > 0)
+			return 0;
+
+		/* Replace optval by the one returned by BPF */
+		if (kernel_optval)
+			optval_s = KERNEL_SOCKPTR(kernel_optval);
+	}
+
 	if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
 		err = sock_setsockopt(sock, level, optname,
-				      USER_SOCKPTR(optval), optlen);
+				      optval_s, optlen);
 	else if (unlikely(!sock->ops->setsockopt))
 		err = -EOPNOTSUPP;
 	else
 		err = sock->ops->setsockopt(sock, level, optname,
-					    USER_SOCKPTR(koptval), optlen);
+					    optval_s, optlen);
 
+	kfree(kernel_optval);
 	return err;
 }