diff mbox series

[net-next,v6,2/4] sock: support copy cmsg to userspace in TX path

Message ID 20240626193403.3854451-3-zijianzhang@bytedance.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series net: A lightweight zero-copy notification | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 929 this patch: 929
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 4 maintainers not CCed: dsahern@kernel.org pabeni@redhat.com kuba@kernel.org axboe@kernel.dk
netdev/build_clang success Errors and warnings before: 1999 this patch: 1999
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 16130 this patch: 16132
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 33 this patch: 33
netdev/source_inline success Was 0 now: 0

Commit Message

Zijian Zhang June 26, 2024, 7:34 p.m. UTC
From: Zijian Zhang <zijianzhang@bytedance.com>

Since ____sys_sendmsg creates a kernel copy of msg_control and passes
that to the callees, put_cmsg will write into this kernel buffer. If
people want to piggyback some information like timestamps upon returning
of sendmsg. ____sys_sendmsg will have to copy_to_user to the original buf,
which is not supported. As a result, users typically have to call recvmsg
on the ERRMSG_QUEUE of the socket, incurring extra system call overhead.

This commit supports copying cmsg to userspace in TX path by introducing
a flag MSG_CMSG_COPY_TO_USER in struct msghdr to guide the copy logic
upon returning of ___sys_sendmsg.

Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
Signed-off-by: Xiaochun Lu <xiaochun.lu@bytedance.com>
---
 include/linux/socket.h |  6 ++++++
 net/core/sock.c        |  2 ++
 net/ipv4/ip_sockglue.c |  2 ++
 net/ipv6/datagram.c    |  3 +++
 net/socket.c           | 45 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 58 insertions(+)

Comments

Jakub Kicinski June 27, 2024, 9:51 p.m. UTC | #1
On Wed, 26 Jun 2024 19:34:01 +0000 zijianzhang@bytedance.com wrote:
> From: Zijian Zhang <zijianzhang@bytedance.com>
> 
> Since ____sys_sendmsg creates a kernel copy of msg_control and passes
> that to the callees, put_cmsg will write into this kernel buffer. If
> people want to piggyback some information like timestamps upon returning
> of sendmsg. ____sys_sendmsg will have to copy_to_user to the original buf,
> which is not supported. As a result, users typically have to call recvmsg
> on the ERRMSG_QUEUE of the socket, incurring extra system call overhead.
> 
> This commit supports copying cmsg to userspace in TX path by introducing
> a flag MSG_CMSG_COPY_TO_USER in struct msghdr to guide the copy logic
> upon returning of ___sys_sendmsg.

sparse complains about the annotations:

net/socket.c:2635:30: warning: incorrect type in assignment (different address spaces)
net/socket.c:2635:30:    expected void *msg_control
net/socket.c:2635:30:    got void [noderef] __user *[noderef] __user msg_control
net/socket.c:2629:49: warning: dereference of noderef expression
kernel test robot June 28, 2024, 3:11 a.m. UTC | #2
Hi,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/zijianzhang-bytedance-com/selftests-fix-OOM-problem-in-msg_zerocopy-selftest/20240627-150801
base:   net-next/main
patch link:    https://lore.kernel.org/r/20240626193403.3854451-3-zijianzhang%40bytedance.com
patch subject: [PATCH net-next v6 2/4] sock: support copy cmsg to userspace in TX path
config: i386-randconfig-062-20240628 (https://download.01.org/0day-ci/archive/20240628/202406281051.NAiS477l-lkp@intel.com/config)
compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240628/202406281051.NAiS477l-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202406281051.NAiS477l-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> net/socket.c:2635:30: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected void *msg_control @@     got void [noderef] __user *[noderef] __user msg_control @@
   net/socket.c:2635:30: sparse:     expected void *msg_control
   net/socket.c:2635:30: sparse:     got void [noderef] __user *[noderef] __user msg_control
>> net/socket.c:2629:49: sparse: sparse: dereference of noderef expression

vim +2635 net/socket.c

  2623	
  2624	static int sendmsg_copy_cmsg_to_user(struct msghdr *msg_sys,
  2625					     struct user_msghdr __user *umsg)
  2626	{
  2627		struct compat_msghdr __user *umsg_compat =
  2628					(struct compat_msghdr __user *)umsg;
> 2629		unsigned long cmsg_ptr = (unsigned long)umsg->msg_control;
  2630		unsigned int flags = msg_sys->msg_flags;
  2631		struct msghdr msg_user = *msg_sys;
  2632		struct cmsghdr *cmsg;
  2633		int err;
  2634	
> 2635		msg_user.msg_control = umsg->msg_control;
  2636		msg_user.msg_control_is_user = true;
  2637		for_each_cmsghdr(cmsg, msg_sys) {
  2638			if (!CMSG_OK(msg_sys, cmsg))
  2639				break;
  2640			if (cmsg_copy_to_user(cmsg))
  2641				put_cmsg(&msg_user, cmsg->cmsg_level, cmsg->cmsg_type,
  2642					 cmsg->cmsg_len - sizeof(*cmsg), CMSG_DATA(cmsg));
  2643		}
  2644	
  2645		err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(umsg));
  2646		if (err)
  2647			return err;
  2648		if (MSG_CMSG_COMPAT & flags)
  2649			err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr,
  2650					 &umsg_compat->msg_controllen);
  2651		else
  2652			err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr,
  2653					 &umsg->msg_controllen);
  2654		return err;
  2655	}
  2656
Willem de Bruijn June 30, 2024, 2:43 p.m. UTC | #3
zijianzhang@ wrote:
> From: Zijian Zhang <zijianzhang@bytedance.com>
> 
> Since ____sys_sendmsg creates a kernel copy of msg_control and passes
> that to the callees, put_cmsg will write into this kernel buffer. If
> people want to piggyback some information like timestamps upon returning
> of sendmsg. ____sys_sendmsg will have to copy_to_user to the original buf,
> which is not supported. As a result, users typically have to call recvmsg
> on the ERRMSG_QUEUE of the socket, incurring extra system call overhead.
> 
> This commit supports copying cmsg to userspace in TX path by introducing
> a flag MSG_CMSG_COPY_TO_USER in struct msghdr to guide the copy logic
> upon returning of ___sys_sendmsg.
> 
> Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
> Signed-off-by: Xiaochun Lu <xiaochun.lu@bytedance.com>
> ---
>  include/linux/socket.h |  6 ++++++
>  net/core/sock.c        |  2 ++
>  net/ipv4/ip_sockglue.c |  2 ++
>  net/ipv6/datagram.c    |  3 +++
>  net/socket.c           | 45 ++++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 58 insertions(+)
> 
> diff --git a/include/linux/socket.h b/include/linux/socket.h
> index 89d16b90370b..35adc30c9db6 100644
> --- a/include/linux/socket.h
> +++ b/include/linux/socket.h
> @@ -168,6 +168,11 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr
>  	return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
>  }
>  
> +static inline bool cmsg_copy_to_user(struct cmsghdr *__cmsg)
> +{
> +	return 0;
> +}
> +
>  static inline size_t msg_data_left(struct msghdr *msg)
>  {
>  	return iov_iter_count(&msg->msg_iter);
> @@ -329,6 +334,7 @@ struct ucred {
>  
>  #define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
>  #define MSG_SPLICE_PAGES 0x8000000	/* Splice the pages from the iterator in sendmsg() */
> +#define MSG_CMSG_COPY_TO_USER	0x10000000	/* Copy cmsg to user space */

Careful that userspace must not be able to set this bit. See also
MSG_INTERNAL_SENDMSG_FLAGS.

Perhaps better to define a bit like msg_control_is_user.

>  #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
>  #define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
>  					   descriptor received through
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 9abc4fe25953..4a766a91ff5c 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -2879,6 +2879,8 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
>  	for_each_cmsghdr(cmsg, msg) {
>  		if (!CMSG_OK(msg, cmsg))
>  			return -EINVAL;
> +		if (cmsg_copy_to_user(cmsg))
> +			msg->msg_flags |= MSG_CMSG_COPY_TO_USER;

Probably better to pass msg to __sock_cmsg_send and only set this
field in the specific cmsg handler that uses it.

>  		if (cmsg->cmsg_level != SOL_SOCKET)
>  			continue;
>  		ret = __sock_cmsg_send(sk, cmsg, sockc);
> diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
> index cf377377b52d..464d08b27fa8 100644
> --- a/net/ipv4/ip_sockglue.c
> +++ b/net/ipv4/ip_sockglue.c
> @@ -249,6 +249,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
>  	for_each_cmsghdr(cmsg, msg) {
>  		if (!CMSG_OK(msg, cmsg))
>  			return -EINVAL;
> +		if (cmsg_copy_to_user(cmsg))
> +			msg->msg_flags |= MSG_CMSG_COPY_TO_USER;
>  #if IS_ENABLED(CONFIG_IPV6)
>  		if (allow_ipv6 &&
>  		    cmsg->cmsg_level == SOL_IPV6 &&
> diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
> index fff78496803d..b0341faf7f83 100644
> --- a/net/ipv6/datagram.c
> +++ b/net/ipv6/datagram.c
> @@ -776,6 +776,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
>  			goto exit_f;
>  		}
>  
> +		if (cmsg_copy_to_user(cmsg))
> +			msg->msg_flags |= MSG_CMSG_COPY_TO_USER;
> +
>  		if (cmsg->cmsg_level == SOL_SOCKET) {
>  			err = __sock_cmsg_send(sk, cmsg, &ipc6->sockc);
>  			if (err)
> diff --git a/net/socket.c b/net/socket.c
> index e416920e9399..6523cf5a7f32 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -2621,6 +2621,39 @@ static int sendmsg_copy_msghdr(struct msghdr *msg,
>  	return 0;
>  }
>  
> +static int sendmsg_copy_cmsg_to_user(struct msghdr *msg_sys,
> +				     struct user_msghdr __user *umsg)
> +{
> +	struct compat_msghdr __user *umsg_compat =
> +				(struct compat_msghdr __user *)umsg;
> +	unsigned long cmsg_ptr = (unsigned long)umsg->msg_control;
> +	unsigned int flags = msg_sys->msg_flags;
> +	struct msghdr msg_user = *msg_sys;
> +	struct cmsghdr *cmsg;
> +	int err;
> +
> +	msg_user.msg_control = umsg->msg_control;
> +	msg_user.msg_control_is_user = true;
> +	for_each_cmsghdr(cmsg, msg_sys) {
> +		if (!CMSG_OK(msg_sys, cmsg))
> +			break;
> +		if (cmsg_copy_to_user(cmsg))
> +			put_cmsg(&msg_user, cmsg->cmsg_level, cmsg->cmsg_type,
> +				 cmsg->cmsg_len - sizeof(*cmsg), CMSG_DATA(cmsg));
> +	}

Alternatively just copy the entire msg_control if any cmsg wants to
be copied back. The others will be unmodified. No need to iterate
then.

> +
> +	err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(umsg));
> +	if (err)
> +		return err;

Does this value need to be written?

> +	if (MSG_CMSG_COMPAT & flags)
> +		err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr,
> +				 &umsg_compat->msg_controllen);
> +	else
> +		err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr,
> +				 &umsg->msg_controllen);
> +	return err;
> +}
> +
>  static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
>  			 struct msghdr *msg_sys, unsigned int flags,
>  			 struct used_address *used_address,
> @@ -2638,6 +2671,18 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
>  
>  	err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
>  				allowed_msghdr_flags);
> +	if (err < 0)
> +		goto out;
> +
> +	if (msg_sys->msg_flags & MSG_CMSG_COPY_TO_USER) {
> +		ssize_t len = err;
> +
> +		err = sendmsg_copy_cmsg_to_user(msg_sys, msg);
> +		if (err)
> +			goto out;
> +		err = len;
> +	}
> +out:
>  	kfree(iov);
>  	return err;
>  }
> -- 
> 2.20.1
>
Zijian Zhang July 1, 2024, 7:46 p.m. UTC | #4
On 6/30/24 7:43 AM, Willem de Bruijn wrote:
> zijianzhang@ wrote:
>> From: Zijian Zhang <zijianzhang@bytedance.com>
>>
>> Since ____sys_sendmsg creates a kernel copy of msg_control and passes
>> that to the callees, put_cmsg will write into this kernel buffer. If
>> people want to piggyback some information like timestamps upon returning
>> of sendmsg. ____sys_sendmsg will have to copy_to_user to the original buf,
>> which is not supported. As a result, users typically have to call recvmsg
>> on the ERRMSG_QUEUE of the socket, incurring extra system call overhead.
>>
>> This commit supports copying cmsg to userspace in TX path by introducing
>> a flag MSG_CMSG_COPY_TO_USER in struct msghdr to guide the copy logic
>> upon returning of ___sys_sendmsg.
>>
>> Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
>> Signed-off-by: Xiaochun Lu <xiaochun.lu@bytedance.com>
>> ---
>>   include/linux/socket.h |  6 ++++++
>>   net/core/sock.c        |  2 ++
>>   net/ipv4/ip_sockglue.c |  2 ++
>>   net/ipv6/datagram.c    |  3 +++
>>   net/socket.c           | 45 ++++++++++++++++++++++++++++++++++++++++++
>>   5 files changed, 58 insertions(+)
>>
>> diff --git a/include/linux/socket.h b/include/linux/socket.h
>> index 89d16b90370b..35adc30c9db6 100644
>> --- a/include/linux/socket.h
>> +++ b/include/linux/socket.h
>> @@ -168,6 +168,11 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr
>>   	return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
>>   }
>>   
>> +static inline bool cmsg_copy_to_user(struct cmsghdr *__cmsg)
>> +{
>> +	return 0;
>> +}
>> +
>>   static inline size_t msg_data_left(struct msghdr *msg)
>>   {
>>   	return iov_iter_count(&msg->msg_iter);
>> @@ -329,6 +334,7 @@ struct ucred {
>>   
>>   #define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
>>   #define MSG_SPLICE_PAGES 0x8000000	/* Splice the pages from the iterator in sendmsg() */
>> +#define MSG_CMSG_COPY_TO_USER	0x10000000	/* Copy cmsg to user space */
> 
> Careful that userspace must not be able to set this bit. See also
> MSG_INTERNAL_SENDMSG_FLAGS.
> 
> Perhaps better to define a bit like msg_control_is_user.
> 
>>   #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
>>   #define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
>>   					   descriptor received through
>> diff --git a/net/core/sock.c b/net/core/sock.c
>> index 9abc4fe25953..4a766a91ff5c 100644
>> --- a/net/core/sock.c
>> +++ b/net/core/sock.c
>> @@ -2879,6 +2879,8 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
>>   	for_each_cmsghdr(cmsg, msg) {
>>   		if (!CMSG_OK(msg, cmsg))
>>   			return -EINVAL;
>> +		if (cmsg_copy_to_user(cmsg))
>> +			msg->msg_flags |= MSG_CMSG_COPY_TO_USER;
> 
> Probably better to pass msg to __sock_cmsg_send and only set this
> field in the specific cmsg handler that uses it.
> 

Thanks for the above suggestions!

>>   		if (cmsg->cmsg_level != SOL_SOCKET)
>>   			continue;
>>   		ret = __sock_cmsg_send(sk, cmsg, sockc);
...
>> +static int sendmsg_copy_cmsg_to_user(struct msghdr *msg_sys,
>> +				     struct user_msghdr __user *umsg)
>> +{
>> +	struct compat_msghdr __user *umsg_compat =
>> +				(struct compat_msghdr __user *)umsg;
>> +	unsigned long cmsg_ptr = (unsigned long)umsg->msg_control;
>> +	unsigned int flags = msg_sys->msg_flags;
>> +	struct msghdr msg_user = *msg_sys;
>> +	struct cmsghdr *cmsg;
>> +	int err;
>> +
>> +	msg_user.msg_control = umsg->msg_control;
>> +	msg_user.msg_control_is_user = true;
>> +	for_each_cmsghdr(cmsg, msg_sys) {
>> +		if (!CMSG_OK(msg_sys, cmsg))
>> +			break;
>> +		if (cmsg_copy_to_user(cmsg))
>> +			put_cmsg(&msg_user, cmsg->cmsg_level, cmsg->cmsg_type,
>> +				 cmsg->cmsg_len - sizeof(*cmsg), CMSG_DATA(cmsg));
>> +	}
> 
> Alternatively just copy the entire msg_control if any cmsg wants to
> be copied back. The others will be unmodified. No need to iterate
> then.
> 

Copy the entire msg_control via copy_to_user does not take
MSG_CMSG_COMPAT into account. I may have to use put_cmsg to deal
with the compat version, and thus have to keep the for loop?

If so, I may keep the function cmsg_copy_to_user to avoid extra copy?

>> +
>> +	err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(umsg));
>> +	if (err)
>> +		return err;
> 
> Does this value need to be written?
> 

I did this according to ____sys_recvmsg, maybe it's useful to export
flag like MSG_CTRUNC to users?

>> +	if (MSG_CMSG_COMPAT & flags)
>> +		err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr,
>> +				 &umsg_compat->msg_controllen);
>> +	else
>> +		err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr,
>> +				 &umsg->msg_controllen);
>> +	return err;
>> +}
>> +
>>   static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
>>   			 struct msghdr *msg_sys, unsigned int flags,
>>   			 struct used_address *used_address,
>> @@ -2638,6 +2671,18 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
>>   
>>   	err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
>>   				allowed_msghdr_flags);
>> +	if (err < 0)
>> +		goto out;
>> +
>> +	if (msg_sys->msg_flags & MSG_CMSG_COPY_TO_USER) {
>> +		ssize_t len = err;
>> +
>> +		err = sendmsg_copy_cmsg_to_user(msg_sys, msg);
>> +		if (err)
>> +			goto out;
>> +		err = len;
>> +	}
>> +out:
>>   	kfree(iov);
>>   	return err;
>>   }
>> -- 
>> 2.20.1
>>
> 
>
Willem de Bruijn July 3, 2024, 12:10 a.m. UTC | #5
Zijian Zhang wrote:
> 
> 
> On 6/30/24 7:43 AM, Willem de Bruijn wrote:
> > zijianzhang@ wrote:
> >> From: Zijian Zhang <zijianzhang@bytedance.com>
> >>
> >> Since ____sys_sendmsg creates a kernel copy of msg_control and passes
> >> that to the callees, put_cmsg will write into this kernel buffer. If
> >> people want to piggyback some information like timestamps upon returning
> >> of sendmsg. ____sys_sendmsg will have to copy_to_user to the original buf,
> >> which is not supported. As a result, users typically have to call recvmsg
> >> on the ERRMSG_QUEUE of the socket, incurring extra system call overhead.
> >>
> >> This commit supports copying cmsg to userspace in TX path by introducing
> >> a flag MSG_CMSG_COPY_TO_USER in struct msghdr to guide the copy logic
> >> upon returning of ___sys_sendmsg.
> >>
> >> Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
> >> Signed-off-by: Xiaochun Lu <xiaochun.lu@bytedance.com>

> >>   		if (cmsg->cmsg_level != SOL_SOCKET)
> >>   			continue;
> >>   		ret = __sock_cmsg_send(sk, cmsg, sockc);
> ...
> >> +static int sendmsg_copy_cmsg_to_user(struct msghdr *msg_sys,
> >> +				     struct user_msghdr __user *umsg)
> >> +{
> >> +	struct compat_msghdr __user *umsg_compat =
> >> +				(struct compat_msghdr __user *)umsg;
> >> +	unsigned long cmsg_ptr = (unsigned long)umsg->msg_control;
> >> +	unsigned int flags = msg_sys->msg_flags;
> >> +	struct msghdr msg_user = *msg_sys;
> >> +	struct cmsghdr *cmsg;
> >> +	int err;
> >> +
> >> +	msg_user.msg_control = umsg->msg_control;
> >> +	msg_user.msg_control_is_user = true;
> >> +	for_each_cmsghdr(cmsg, msg_sys) {
> >> +		if (!CMSG_OK(msg_sys, cmsg))
> >> +			break;
> >> +		if (cmsg_copy_to_user(cmsg))
> >> +			put_cmsg(&msg_user, cmsg->cmsg_level, cmsg->cmsg_type,
> >> +				 cmsg->cmsg_len - sizeof(*cmsg), CMSG_DATA(cmsg));
> >> +	}
> > 
> > Alternatively just copy the entire msg_control if any cmsg wants to
> > be copied back. The others will be unmodified. No need to iterate
> > then.
> > 
> 
> Copy the entire msg_control via copy_to_user does not take
> MSG_CMSG_COMPAT into account. I may have to use put_cmsg to deal
> with the compat version, and thus have to keep the for loop?

Good point. Okay, then this is pretty clean. Only returning the
cmsg that have been written to is actually quite nice.
 
> If so, I may keep the function cmsg_copy_to_user to avoid extra copy?
> 
> >> +
> >> +	err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(umsg));
> >> +	if (err)
> >> +		return err;
> > 
> > Does this value need to be written?
> > 
> 
> I did this according to ____sys_recvmsg, maybe it's useful to export
> flag like MSG_CTRUNC to users?

Good point.
diff mbox series

Patch

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 89d16b90370b..35adc30c9db6 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -168,6 +168,11 @@  static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr
 	return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
 }
 
+static inline bool cmsg_copy_to_user(struct cmsghdr *__cmsg)
+{
+	return 0;
+}
+
 static inline size_t msg_data_left(struct msghdr *msg)
 {
 	return iov_iter_count(&msg->msg_iter);
@@ -329,6 +334,7 @@  struct ucred {
 
 #define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
 #define MSG_SPLICE_PAGES 0x8000000	/* Splice the pages from the iterator in sendmsg() */
+#define MSG_CMSG_COPY_TO_USER	0x10000000	/* Copy cmsg to user space */
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
 #define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
 					   descriptor received through
diff --git a/net/core/sock.c b/net/core/sock.c
index 9abc4fe25953..4a766a91ff5c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2879,6 +2879,8 @@  int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
 	for_each_cmsghdr(cmsg, msg) {
 		if (!CMSG_OK(msg, cmsg))
 			return -EINVAL;
+		if (cmsg_copy_to_user(cmsg))
+			msg->msg_flags |= MSG_CMSG_COPY_TO_USER;
 		if (cmsg->cmsg_level != SOL_SOCKET)
 			continue;
 		ret = __sock_cmsg_send(sk, cmsg, sockc);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index cf377377b52d..464d08b27fa8 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -249,6 +249,8 @@  int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 	for_each_cmsghdr(cmsg, msg) {
 		if (!CMSG_OK(msg, cmsg))
 			return -EINVAL;
+		if (cmsg_copy_to_user(cmsg))
+			msg->msg_flags |= MSG_CMSG_COPY_TO_USER;
 #if IS_ENABLED(CONFIG_IPV6)
 		if (allow_ipv6 &&
 		    cmsg->cmsg_level == SOL_IPV6 &&
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index fff78496803d..b0341faf7f83 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -776,6 +776,9 @@  int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 			goto exit_f;
 		}
 
+		if (cmsg_copy_to_user(cmsg))
+			msg->msg_flags |= MSG_CMSG_COPY_TO_USER;
+
 		if (cmsg->cmsg_level == SOL_SOCKET) {
 			err = __sock_cmsg_send(sk, cmsg, &ipc6->sockc);
 			if (err)
diff --git a/net/socket.c b/net/socket.c
index e416920e9399..6523cf5a7f32 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2621,6 +2621,39 @@  static int sendmsg_copy_msghdr(struct msghdr *msg,
 	return 0;
 }
 
+static int sendmsg_copy_cmsg_to_user(struct msghdr *msg_sys,
+				     struct user_msghdr __user *umsg)
+{
+	struct compat_msghdr __user *umsg_compat =
+				(struct compat_msghdr __user *)umsg;
+	unsigned long cmsg_ptr = (unsigned long)umsg->msg_control;
+	unsigned int flags = msg_sys->msg_flags;
+	struct msghdr msg_user = *msg_sys;
+	struct cmsghdr *cmsg;
+	int err;
+
+	msg_user.msg_control = umsg->msg_control;
+	msg_user.msg_control_is_user = true;
+	for_each_cmsghdr(cmsg, msg_sys) {
+		if (!CMSG_OK(msg_sys, cmsg))
+			break;
+		if (cmsg_copy_to_user(cmsg))
+			put_cmsg(&msg_user, cmsg->cmsg_level, cmsg->cmsg_type,
+				 cmsg->cmsg_len - sizeof(*cmsg), CMSG_DATA(cmsg));
+	}
+
+	err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(umsg));
+	if (err)
+		return err;
+	if (MSG_CMSG_COMPAT & flags)
+		err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr,
+				 &umsg_compat->msg_controllen);
+	else
+		err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr,
+				 &umsg->msg_controllen);
+	return err;
+}
+
 static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
 			 struct msghdr *msg_sys, unsigned int flags,
 			 struct used_address *used_address,
@@ -2638,6 +2671,18 @@  static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
 
 	err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
 				allowed_msghdr_flags);
+	if (err < 0)
+		goto out;
+
+	if (msg_sys->msg_flags & MSG_CMSG_COPY_TO_USER) {
+		ssize_t len = err;
+
+		err = sendmsg_copy_cmsg_to_user(msg_sys, msg);
+		if (err)
+			goto out;
+		err = len;
+	}
+out:
 	kfree(iov);
 	return err;
 }