diff mbox series

[v3,6/9] io_uring/cmd: Introduce SOCKET_URING_OP_GETSOCKOPT

Message ID 20230817145554.892543-7-leitao@debian.org (mailing list archive)
State Not Applicable
Headers show
Series io_uring: Initial support for {s,g}etsockopt commands | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply, async
bpf/vmtest-bpf-PR success PR summary
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-3 success Logs for build for s390x with gcc
bpf/vmtest-bpf-VM_Test-4 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-5 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-6 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-7 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-8 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-VM_Test-9 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-10 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-11 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-12 fail Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-13 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-14 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-15 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-16 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-17 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-18 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-19 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-21 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-22 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-24 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-25 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-26 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-27 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-28 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-29 success Logs for veristat

Commit Message

Breno Leitao Aug. 17, 2023, 2:55 p.m. UTC
Add support for getsockopt command (SOCKET_URING_OP_GETSOCKOPT), where
level is SOL_SOCKET. This is leveraging the sockptr_t infrastructure,
where a sockptr_t is either userspace or kernel space, and handled as
such.

Function io_uring_cmd_getsockopt() is inspired by __sys_getsockopt().

Differently from the getsockopt(2), the optlen field is not a userspace
pointers. In getsockopt(2), userspace provides optlen pointer, which is
overwritten by the kernel.  In this implementation, userspace passes a
u32, and the new value is returned in cqe->res. I.e., optlen is not a
pointer.

Important to say that userspace needs to keep the pointer alive until
the CQE is completed.

Signed-off-by: Breno Leitao <leitao@debian.org>
---
 include/uapi/linux/io_uring.h |  7 +++++++
 io_uring/uring_cmd.c          | 38 +++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

Comments

Gabriel Krisman Bertazi Aug. 17, 2023, 6:38 p.m. UTC | #1
Breno Leitao <leitao@debian.org> writes:

> +#if defined(CONFIG_NET)
>  int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
>  {
>  	struct socket *sock = cmd->file->private_data;
> @@ -189,8 +219,16 @@ int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
>  		if (ret)
>  			return ret;
>  		return arg;
> +	case SOCKET_URING_OP_GETSOCKOPT:
> +		return io_uring_cmd_getsockopt(sock, cmd, issue_flags);
>  	default:
>  		return -EOPNOTSUPP;
>  	}
>  }
> +#else
> +int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
> +{
> +	return -EOPNOTSUPP;
> +}
> +#endif
>  EXPORT_SYMBOL_GPL(io_uring_cmd_sock);

The CONFIG_NET guards are unrelated and need to go in a separate commit.
Specially because it is not only gating getsockopt, but also the already
merged SOCKET_URING_OP_SIOCINQ/_OP_SIOCOUTQ.
Breno Leitao Aug. 21, 2023, 9:09 a.m. UTC | #2
Hello Gabriel,

On Thu, Aug 17, 2023 at 02:38:46PM -0400, Gabriel Krisman Bertazi wrote:
> Breno Leitao <leitao@debian.org> writes:
> 
> > +#if defined(CONFIG_NET)
> >  int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
> >  {
> >  	struct socket *sock = cmd->file->private_data;
> > @@ -189,8 +219,16 @@ int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
> >  		if (ret)
> >  			return ret;
> >  		return arg;
> > +	case SOCKET_URING_OP_GETSOCKOPT:
> > +		return io_uring_cmd_getsockopt(sock, cmd, issue_flags);
> >  	default:
> >  		return -EOPNOTSUPP;
> >  	}
> >  }
> > +#else
> > +int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
> > +{
> > +	return -EOPNOTSUPP;
> > +}
> > +#endif
> >  EXPORT_SYMBOL_GPL(io_uring_cmd_sock);
> 
> The CONFIG_NET guards are unrelated and need to go in a separate commit.
> Specially because it is not only gating getsockopt, but also the already
> merged SOCKET_URING_OP_SIOCINQ/_OP_SIOCOUTQ.

Well, so far, if CONFIG_NET is disable, and you call
io_uring_cmd_getsockopt, the callbacks will be called and returned
-EOPNOTSUPP.

With this new patch, it will eventually call sk_getsockopt which does
not exist in the CONFIG_NET=n world. That is why I have this
protection now. I.e, this `#if defined(CONFIG_NET)` is only necessary now,
since it is the first time this function (io_uring_cmd_sock) will call a
function that does not exist if CONFIG_NET is disabled.

I can split it in a different patch, if you think it makes a difference.
Gabriel Krisman Bertazi Aug. 21, 2023, 2:52 p.m. UTC | #3
Breno Leitao <leitao@debian.org> writes:

> Hello Gabriel,
>
> On Thu, Aug 17, 2023 at 02:38:46PM -0400, Gabriel Krisman Bertazi wrote:
>> Breno Leitao <leitao@debian.org> writes:
>> 
>> > +#if defined(CONFIG_NET)
>> >  int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
>> >  {
>> >  	struct socket *sock = cmd->file->private_data;
>> > @@ -189,8 +219,16 @@ int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
>> >  		if (ret)
>> >  			return ret;
>> >  		return arg;
>> > +	case SOCKET_URING_OP_GETSOCKOPT:
>> > +		return io_uring_cmd_getsockopt(sock, cmd, issue_flags);
>> >  	default:
>> >  		return -EOPNOTSUPP;
>> >  	}
>> >  }
>> > +#else
>> > +int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
>> > +{
>> > +	return -EOPNOTSUPP;
>> > +}
>> > +#endif
>> >  EXPORT_SYMBOL_GPL(io_uring_cmd_sock);
>> 
>> The CONFIG_NET guards are unrelated and need to go in a separate commit.
>> Specially because it is not only gating getsockopt, but also the already
>> merged SOCKET_URING_OP_SIOCINQ/_OP_SIOCOUTQ.
>
> Well, so far, if CONFIG_NET is disable, and you call
> io_uring_cmd_getsockopt, the callbacks will be called and returned
> -EOPNOTSUPP.

I'm not talking about io_uring_cmd_getsockopt; that would obviously
return -EOPNOTSUPP before as well.  But you are changing
io_uring_cmd_sock, which does more things:

int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
        [...]
	if (!prot || !prot->ioctl)
		return -EOPNOTSUPP;

	switch (cmd->sqe->cmd_op) {
	case SOCKET_URING_OP_SIOCINQ:
		ret = prot->ioctl(sk, SIOCINQ, &arg);
		if (ret)
			return ret;
		return arg;
	case SOCKET_URING_OP_SIOCOUTQ:
		ret = prot->ioctl(sk, SIOCOUTQ, &arg);
		if (ret)
			return ret;

With your patch, these two cmd_op, are now being gated by CONFIG_NET.  I
think it makes sense for them to be gated by it, But it should
be in a separated change, or at least explained in your commit message.

> With this new patch, it will eventually call sk_getsockopt which does
> not exist in the CONFIG_NET=n world. That is why I have this
> protection now. I.e, this `#if defined(CONFIG_NET)` is only necessary now,
> since it is the first time this function (io_uring_cmd_sock) will call a
> function that does not exist if CONFIG_NET is disabled.
>
> I can split it in a different patch, if you think it makes a difference.
diff mbox series

Patch

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 9fc7195f25df..8152151080db 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -43,6 +43,10 @@  struct io_uring_sqe {
 	union {
 		__u64	addr;	/* pointer to buffer or iovecs */
 		__u64	splice_off_in;
+		struct {
+			__u32	level;
+			__u32	optname;
+		};
 	};
 	__u32	len;		/* buffer size or number of iovecs */
 	union {
@@ -79,6 +83,7 @@  struct io_uring_sqe {
 	union {
 		__s32	splice_fd_in;
 		__u32	file_index;
+		__u32	optlen;
 		struct {
 			__u16	addr_len;
 			__u16	__pad3[1];
@@ -89,6 +94,7 @@  struct io_uring_sqe {
 			__u64	addr3;
 			__u64	__pad2[1];
 		};
+		__u64	optval;
 		/*
 		 * If the ring is initialized with IORING_SETUP_SQE128, then
 		 * this field is used for 80 bytes of arbitrary command data
@@ -729,6 +735,7 @@  struct io_uring_recvmsg_out {
 enum {
 	SOCKET_URING_OP_SIOCINQ		= 0,
 	SOCKET_URING_OP_SIOCOUTQ,
+	SOCKET_URING_OP_GETSOCKOPT,
 };
 
 #ifdef __cplusplus
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 5f32083bd0a5..7b768f1bf4df 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -168,6 +168,36 @@  int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
 }
 EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed);
 
+INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
+							 int optname));
+static inline int io_uring_cmd_getsockopt(struct socket *sock,
+					  struct io_uring_cmd *cmd,
+					  unsigned int issue_flags)
+{
+	void __user *optval = u64_to_user_ptr(READ_ONCE(cmd->sqe->optval));
+	int optname = READ_ONCE(cmd->sqe->optname);
+	int optlen = READ_ONCE(cmd->sqe->optlen);
+	int level = READ_ONCE(cmd->sqe->level);
+	int err;
+
+	err = security_socket_getsockopt(sock, level, optname);
+	if (err)
+		return err;
+
+	if (level == SOL_SOCKET) {
+		err = sk_getsockopt(sock->sk, level, optname,
+				    USER_SOCKPTR(optval),
+				    KERNEL_SOCKPTR(&optlen));
+		if (err)
+			return err;
+
+		return optlen;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+#if defined(CONFIG_NET)
 int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
 {
 	struct socket *sock = cmd->file->private_data;
@@ -189,8 +219,16 @@  int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
 		if (ret)
 			return ret;
 		return arg;
+	case SOCKET_URING_OP_GETSOCKOPT:
+		return io_uring_cmd_getsockopt(sock, cmd, issue_flags);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
+#else
+int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+	return -EOPNOTSUPP;
+}
+#endif
 EXPORT_SYMBOL_GPL(io_uring_cmd_sock);