Message ID | 20240626193403.3854451-3-zijianzhang@bytedance.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: A lightweight zero-copy notification | expand |
On Wed, 26 Jun 2024 19:34:01 +0000 zijianzhang@bytedance.com wrote: > From: Zijian Zhang <zijianzhang@bytedance.com> > > Since ____sys_sendmsg creates a kernel copy of msg_control and passes > that to the callees, put_cmsg will write into this kernel buffer. If > people want to piggyback some information like timestamps upon returning > of sendmsg. ____sys_sendmsg will have to copy_to_user to the original buf, > which is not supported. As a result, users typically have to call recvmsg > on the ERRMSG_QUEUE of the socket, incurring extra system call overhead. > > This commit supports copying cmsg to userspace in TX path by introducing > a flag MSG_CMSG_COPY_TO_USER in struct msghdr to guide the copy logic > upon returning of ___sys_sendmsg. sparse complains about the annotations: net/socket.c:2635:30: warning: incorrect type in assignment (different address spaces) net/socket.c:2635:30: expected void *msg_control net/socket.c:2635:30: got void [noderef] __user *[noderef] __user msg_control net/socket.c:2629:49: warning: dereference of noderef expression
Hi, kernel test robot noticed the following build warnings: [auto build test WARNING on net-next/main] url: https://github.com/intel-lab-lkp/linux/commits/zijianzhang-bytedance-com/selftests-fix-OOM-problem-in-msg_zerocopy-selftest/20240627-150801 base: net-next/main patch link: https://lore.kernel.org/r/20240626193403.3854451-3-zijianzhang%40bytedance.com patch subject: [PATCH net-next v6 2/4] sock: support copy cmsg to userspace in TX path config: i386-randconfig-062-20240628 (https://download.01.org/0day-ci/archive/20240628/202406281051.NAiS477l-lkp@intel.com/config) compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240628/202406281051.NAiS477l-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202406281051.NAiS477l-lkp@intel.com/ sparse warnings: (new ones prefixed by >>) >> net/socket.c:2635:30: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected void *msg_control @@ got void [noderef] __user *[noderef] __user msg_control @@ net/socket.c:2635:30: sparse: expected void *msg_control net/socket.c:2635:30: sparse: got void [noderef] __user *[noderef] __user msg_control >> net/socket.c:2629:49: sparse: sparse: dereference of noderef expression vim +2635 net/socket.c 2623 2624 static int sendmsg_copy_cmsg_to_user(struct msghdr *msg_sys, 2625 struct user_msghdr __user *umsg) 2626 { 2627 struct compat_msghdr __user *umsg_compat = 2628 (struct compat_msghdr __user *)umsg; > 2629 unsigned long cmsg_ptr = (unsigned long)umsg->msg_control; 2630 unsigned int flags = msg_sys->msg_flags; 2631 struct msghdr msg_user = *msg_sys; 2632 struct cmsghdr *cmsg; 2633 int err; 2634 > 2635 msg_user.msg_control = umsg->msg_control; 2636 msg_user.msg_control_is_user = true; 2637 for_each_cmsghdr(cmsg, msg_sys) { 2638 if (!CMSG_OK(msg_sys, cmsg)) 2639 break; 2640 if (cmsg_copy_to_user(cmsg)) 2641 put_cmsg(&msg_user, cmsg->cmsg_level, cmsg->cmsg_type, 2642 cmsg->cmsg_len - sizeof(*cmsg), CMSG_DATA(cmsg)); 2643 } 2644 2645 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(umsg)); 2646 if (err) 2647 return err; 2648 if (MSG_CMSG_COMPAT & flags) 2649 err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr, 2650 &umsg_compat->msg_controllen); 2651 else 2652 err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr, 2653 &umsg->msg_controllen); 2654 return err; 2655 } 2656
diff --git a/include/linux/socket.h b/include/linux/socket.h index 89d16b90370b..35adc30c9db6 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -168,6 +168,11 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } +static inline bool cmsg_copy_to_user(struct cmsghdr *__cmsg) +{ + return 0; +} + static inline size_t msg_data_left(struct msghdr *msg) { return iov_iter_count(&msg->msg_iter); @@ -329,6 +334,7 @@ struct ucred { #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */ +#define MSG_CMSG_COPY_TO_USER 0x10000000 /* Copy cmsg to user space */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ #define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file descriptor received through diff --git a/net/core/sock.c b/net/core/sock.c index 9abc4fe25953..4a766a91ff5c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2879,6 +2879,8 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; + if (cmsg_copy_to_user(cmsg)) + msg->msg_flags |= MSG_CMSG_COPY_TO_USER; if (cmsg->cmsg_level != SOL_SOCKET) continue; ret = __sock_cmsg_send(sk, cmsg, sockc); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index cf377377b52d..464d08b27fa8 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -249,6 +249,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; + if (cmsg_copy_to_user(cmsg)) + msg->msg_flags |= MSG_CMSG_COPY_TO_USER; #if IS_ENABLED(CONFIG_IPV6) if (allow_ipv6 && cmsg->cmsg_level == SOL_IPV6 && diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index fff78496803d..b0341faf7f83 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -776,6 +776,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, goto exit_f; } + if (cmsg_copy_to_user(cmsg)) + msg->msg_flags |= MSG_CMSG_COPY_TO_USER; + if (cmsg->cmsg_level == SOL_SOCKET) { err = __sock_cmsg_send(sk, cmsg, &ipc6->sockc); if (err) diff --git a/net/socket.c b/net/socket.c index e416920e9399..6523cf5a7f32 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2621,6 +2621,39 @@ static int sendmsg_copy_msghdr(struct msghdr *msg, return 0; } +static int sendmsg_copy_cmsg_to_user(struct msghdr *msg_sys, + struct user_msghdr __user *umsg) +{ + struct compat_msghdr __user *umsg_compat = + (struct compat_msghdr __user *)umsg; + unsigned long cmsg_ptr = (unsigned long)umsg->msg_control; + unsigned int flags = msg_sys->msg_flags; + struct msghdr msg_user = *msg_sys; + struct cmsghdr *cmsg; + int err; + + msg_user.msg_control = umsg->msg_control; + msg_user.msg_control_is_user = true; + for_each_cmsghdr(cmsg, msg_sys) { + if (!CMSG_OK(msg_sys, cmsg)) + break; + if (cmsg_copy_to_user(cmsg)) + put_cmsg(&msg_user, cmsg->cmsg_level, cmsg->cmsg_type, + cmsg->cmsg_len - sizeof(*cmsg), CMSG_DATA(cmsg)); + } + + err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(umsg)); + if (err) + return err; + if (MSG_CMSG_COMPAT & flags) + err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr, + &umsg_compat->msg_controllen); + else + err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr, + &umsg->msg_controllen); + return err; +} + static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, struct msghdr *msg_sys, unsigned int flags, struct used_address *used_address, @@ -2638,6 +2671,18 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, err = ____sys_sendmsg(sock, msg_sys, flags, used_address, allowed_msghdr_flags); + if (err < 0) + goto out; + + if (msg_sys->msg_flags & MSG_CMSG_COPY_TO_USER) { + ssize_t len = err; + + err = sendmsg_copy_cmsg_to_user(msg_sys, msg); + if (err) + goto out; + err = len; + } +out: kfree(iov); return err; }