diff mbox series

[mptcp-next,3/5] mptcp: add MPTCP_TCPINFO getsockopt support

Message ID 20210811131523.6339-4-fw@strlen.de (mailing list archive)
State Superseded, archived
Delegated to: Paolo Abeni
Headers show
Series mptcp: add SOL_MPTCP getsockopt support | expand

Commit Message

Florian Westphal Aug. 11, 2021, 1:15 p.m. UTC
Allow users to retrieve TCP_INFO data of all subflows.
Users need to pre-initialize a meta header that has to be
prepended to the data buffer that will be filled with the
tcp info data.

The meta header looks like this:

struct mptcp_subflow_data {
 __u32 size_subflow_data;/* size of this structure in userspace */
 __u32 num_subflows;	/* must be 0, set by kernel */
 __u32 size_kernel;	/* must be 0, set by kernel */
 __u32 size_user;	/* size of one element in data[] */
} __attribute__((aligned(8)));

size_subflow_data has to be set to 'sizeof(struct mptcp_subflow_data)'.
This allows to extend mptcp_subflow_data structure later on without
breaking backwards compatibility.

If the structure is extended later on, kernel knows where the
userspace-provided meta header ends, even if userspace uses an
older (smaller) version of the structure.

num_subflows must be set to 0. If the getsockopt request
succeeds (return value is 0), it will be updated to contain
the number of active subflows for the given logical connection.

size_kernel must be set to 0. If the getsockopt request
is successful, it will contain the size of the 'struct tcp_info'
as known by the kernel.  This is informational only.

size_user must be set to 'sizeof(struct tcp_info)'.
This allows the kernel to only fill in the space reserved/expected
by userspace.

Example:

struct my_tcp_info {
  struct mptcp_subflow_data d;
  struct tcp_info ti[2];
};
struct my_tcp_info ti;
socklen_t olen;

memset(&ti, 0, sizeof(ti));

ti.d.size_subflow_data = sizeof(struct mptcp_subflow_data);
ti.d.size_user = sizeof(struct tcp_info);
olen = sizeof(ti);

ret = getsockopt(fd, SOL_MPTCP, MPTCP_TCPINFO, &ti, &olen);
if (ret < 0)
	die_perror("getsockopt MPTCP_TCPINFO");

mptcp_subflow_data.num_subflows is populated with the number of
subflows that exist on the kernel side for the logical mptcp connection.

This allows userspace to re-try with a larger tcp_info array if
the number of subflows was larger than the available space in the ti[]
array.

olen has to be set to the number of bytes that userspace has allocated
to receive the kernel data.  It will be updated to contain the real
number bytes that have been copied to by the kernel.

In the above example, if the number if subflows was 1,
olen is equal to 'sizeof(struct mptcp_subflow_data) + sizeof(struct tcp_info).
For 2 or more subflows olen is equal to 'sizeof(struct my_tcp_info)'.

olen is never increased vs. what was reserved.
If there was more data that could not be copied due to lack of space
in the option buffer, userspace can detect this by checking
mptcp_subflow_data->num_subflows.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/uapi/linux/mptcp.h | 10 ++++-
 net/mptcp/sockopt.c        | 92 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+), 1 deletion(-)

Comments

Mat Martineau Aug. 12, 2021, 5:03 p.m. UTC | #1
On Wed, 11 Aug 2021, Florian Westphal wrote:

> diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
> index eba294a071c8..ac8e6823db4f 100644
> --- a/net/mptcp/sockopt.c
> +++ b/net/mptcp/sockopt.c
> @@ -723,6 +723,96 @@ static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, in
>
> 	if (copy_to_user(optval, &m_info, len))
> 		return -EFAULT;
> +	return 0;
> +}
> +
> +static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
> +				  char __user *optval, int __user *_u_optlen)
> +{
> +#define MIN_INFO_OPTLEN_SIZE	16

This caught my eye on my first quick review pass, I'm not accustomed to 
seeing inline defines like this in the kernel unless it's part of some 
macro magic. The style guide / checkpatch don't say anything, so I'll just 
suggest it would fit better outside the function scope (but no big deal).


For the rest of the patch set, Paolo already covered the expanded test 
cases, so I'll continue with a more in-depth review when you post v2. 
This iteration of the API is looking like the right direction to me.


Thanks,

Mat


> +	int len, copylen;
> +
> +	if (get_user(len, _u_optlen))
> +		return -EFAULT;
> +
> +	if (len <= MIN_INFO_OPTLEN_SIZE)
> +		return -EINVAL;
> +
> +	memset(sfd, 0, sizeof(*sfd));
> +
> +	copylen = min_t(unsigned int, len, sizeof(*sfd));
> +	if (copy_from_user(sfd, optval, copylen))
> +		return -EFAULT;
> +
> +	/* size_subflow_data is u32, but len is signed */
> +	if (sfd->size_subflow_data > INT_MAX ||
> +	    sfd->size_user > INT_MAX)
> +		return -EINVAL;
> +
> +	if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE ||
> +	    sfd->size_subflow_data > len)
> +		return -EINVAL;
> +
> +	if (sfd->num_subflows || sfd->size_kernel)
> +		return -EINVAL;
> +
> +	return len - sfd->size_subflow_data;
> +}

--
Mat Martineau
Intel
Florian Westphal Aug. 12, 2021, 6:41 p.m. UTC | #2
Mat Martineau <mathew.j.martineau@linux.intel.com> wrote:
> 
> 
> On Wed, 11 Aug 2021, Florian Westphal wrote:
> 
> > diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
> > index eba294a071c8..ac8e6823db4f 100644
> > --- a/net/mptcp/sockopt.c
> > +++ b/net/mptcp/sockopt.c
> > @@ -723,6 +723,96 @@ static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, in
> > 
> > 	if (copy_to_user(optval, &m_info, len))
> > 		return -EFAULT;
> > +	return 0;
> > +}
> > +
> > +static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
> > +				  char __user *optval, int __user *_u_optlen)
> > +{
> > +#define MIN_INFO_OPTLEN_SIZE	16
> 
> This caught my eye on my first quick review pass, I'm not accustomed to
> seeing inline defines like this in the kernel unless it's part of some macro
> magic. The style guide / checkpatch don't say anything, so I'll just suggest
> it would fit better outside the function scope (but no big deal).

An alternative would be to use 'sizeof struct mptcp_subflow_data' here
and change that structure a bit.

Instead of storing the size, add a 'version/flags' field.

That would also allow to extend the structure later: kernel would
expose a 'struct mptcp_subflow_data_v2' or similar if the need would
arise and userspace would tell kernel the size in indirect fashion by
setting a special 'v2 enabled' field.
diff mbox series

Patch

diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index c450feba6f9c..7c368fccd83e 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -194,7 +194,15 @@  enum mptcp_event_attr {
 #define MPTCP_RST_EBADPERF	5
 #define MPTCP_RST_EMIDDLEBOX	6
 
+struct mptcp_subflow_data {
+	__u32		size_subflow_data;		/* size of this structure in userspace */
+	__u32		num_subflows;			/* must be 0, set by kernel */
+	__u32		size_kernel;			/* must be 0, set by kernel */
+	__u32		size_user;			/* size of one element in data[] */
+} __attribute__((aligned(8)));
+
 /* MPTCP socket options */
-#define MPTCP_INFO 1
+#define MPTCP_INFO		1
+#define MPTCP_TCPINFO		2
 
 #endif /* _UAPI_MPTCP_H */
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index eba294a071c8..ac8e6823db4f 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -723,6 +723,96 @@  static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, in
 
 	if (copy_to_user(optval, &m_info, len))
 		return -EFAULT;
+	return 0;
+}
+
+static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
+				  char __user *optval, int __user *_u_optlen)
+{
+#define MIN_INFO_OPTLEN_SIZE	16
+	int len, copylen;
+
+	if (get_user(len, _u_optlen))
+		return -EFAULT;
+
+	if (len <= MIN_INFO_OPTLEN_SIZE)
+		return -EINVAL;
+
+	memset(sfd, 0, sizeof(*sfd));
+
+	copylen = min_t(unsigned int, len, sizeof(*sfd));
+	if (copy_from_user(sfd, optval, copylen))
+		return -EFAULT;
+
+	/* size_subflow_data is u32, but len is signed */
+	if (sfd->size_subflow_data > INT_MAX ||
+	    sfd->size_user > INT_MAX)
+		return -EINVAL;
+
+	if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE ||
+	    sfd->size_subflow_data > len)
+		return -EINVAL;
+
+	if (sfd->num_subflows || sfd->size_kernel)
+		return -EINVAL;
+
+	return len - sfd->size_subflow_data;
+}
+
+static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval,
+				    int __user *_u_optlen)
+{
+	struct mptcp_subflow_context *subflow;
+	struct sock *sk = &msk->sk.icsk_inet.sk;
+	unsigned int sfcount = 0, copied = 0;
+	struct mptcp_subflow_data sfd;
+	char __user *infoptr;
+	int len;
+
+	len = mptcp_get_subflow_data(&sfd, optval, _u_optlen);
+	if (len < 0)
+		return len;
+
+	sfd.size_kernel = sizeof(struct tcp_info);
+	sfd.size_user = min_t(unsigned int, sfd.size_user,
+			      sizeof(struct tcp_info));
+
+	infoptr = optval + sfd.size_subflow_data;
+
+	lock_sock(sk);
+
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+		++sfcount;
+
+		if (len >= sfd.size_user) {
+			struct tcp_info info;
+
+			tcp_get_info(ssk, &info);
+
+			if (copy_to_user(infoptr, &info, sfd.size_user)) {
+				release_sock(sk);
+				return -EFAULT;
+			}
+
+			infoptr += sfd.size_user;
+			copied += sfd.size_user;
+			len -= sfd.size_user;
+		}
+	}
+
+	release_sock(sk);
+
+	copied += sfd.size_subflow_data;
+
+	if (put_user(copied, _u_optlen))
+		return -EFAULT;
+
+	sfd.num_subflows = sfcount;
+
+	if (copy_to_user(optval, &sfd, sfd.size_subflow_data))
+		return -EFAULT;
 
 	return 0;
 }
@@ -747,6 +837,8 @@  static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
 	switch (optname) {
 	case MPTCP_INFO:
 		return mptcp_getsockopt_info(msk, optval, optlen);
+	case MPTCP_TCPINFO:
+		return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
 	}
 
 	return -EOPNOTSUPP;