diff mbox series

[3/3] io_uring: zerocopy receive

Message ID 20220124094320.900713-4-haoxu@linux.alibaba.com (mailing list archive)
State Not Applicable
Delegated to: Netdev Maintainers
Headers show
Series io_uring zerocopy receive | expand

Checks

Context Check Description
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 32 this patch: 32
netdev/cc_maintainers success CCed 3 of 3 maintainers
netdev/build_clang success Errors and warnings before: 22 this patch: 22
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 37 this patch: 37
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 121 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/tree_selection success Guessing tree name failed - patch did not apply, async

Commit Message

Hao Xu Jan. 24, 2022, 9:43 a.m. UTC
Integrate the current zerocopy receive solution to io_uring for eazier
use. The current calling process is:
  1) mmap a range of virtual address
  2) poll() to wait for data ready of the sockfd
  3) call getsockopt() to map the address in 1) to physical pages
  4) access the data.

By integrating it to io_uring, 2) and 3) can be merged:
  1) mmap a range of virtual address
  2) prepare a sqe and submit
  3) get a cqe which indicates data is ready and mapped
  4) access the data

which reduce one system call and make users be unaware of 3)

Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
---
 fs/io_uring.c                 | 72 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  1 +
 2 files changed, 73 insertions(+)

Comments

kernel test robot Jan. 24, 2022, 3:01 p.m. UTC | #1
Hi Hao,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.17-rc1 next-20220124]
[cannot apply to horms-ipvs/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Hao-Xu/io_uring-zerocopy-receive/20220124-174546
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git dd81e1c7d5fb126e5fbc5c9e334d7b3ec29a16a0
config: s390-buildonly-randconfig-r004-20220124 (https://download.01.org/0day-ci/archive/20220124/202201242233.64QOWQZ1-lkp@intel.com/config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 2e58a18910867ba6795066e044293e6daf89edf5)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install s390 cross compiling tool for clang build
        # apt-get install binutils-s390x-linux-gnu
        # https://github.com/0day-ci/linux/commit/295704165d394635876364522d3ac1451b62da66
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Hao-Xu/io_uring-zerocopy-receive/20220124-174546
        git checkout 295704165d394635876364522d3ac1451b62da66
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from fs/io_uring.c:60:
   In file included from include/linux/blk-mq.h:8:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/s390/include/asm/io.h:75:
   include/asm-generic/io.h:464:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __raw_readb(PCI_IOBASE + addr);
                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:477:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
   #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
                                                             ^
   include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
   #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
                                                        ^
   In file included from fs/io_uring.c:60:
   In file included from include/linux/blk-mq.h:8:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/s390/include/asm/io.h:75:
   include/asm-generic/io.h:490:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
   #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
                                                             ^
   include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
   #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
                                                        ^
   In file included from fs/io_uring.c:60:
   In file included from include/linux/blk-mq.h:8:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/s390/include/asm/io.h:75:
   include/asm-generic/io.h:501:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writeb(value, PCI_IOBASE + addr);
                               ~~~~~~~~~~ ^
   include/asm-generic/io.h:511:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
   include/asm-generic/io.h:521:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
   include/asm-generic/io.h:609:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           readsb(PCI_IOBASE + addr, buffer, count);
                  ~~~~~~~~~~ ^
   include/asm-generic/io.h:617:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           readsw(PCI_IOBASE + addr, buffer, count);
                  ~~~~~~~~~~ ^
   include/asm-generic/io.h:625:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           readsl(PCI_IOBASE + addr, buffer, count);
                  ~~~~~~~~~~ ^
   include/asm-generic/io.h:634:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           writesb(PCI_IOBASE + addr, buffer, count);
                   ~~~~~~~~~~ ^
   include/asm-generic/io.h:643:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           writesw(PCI_IOBASE + addr, buffer, count);
                   ~~~~~~~~~~ ^
   include/asm-generic/io.h:652:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           writesl(PCI_IOBASE + addr, buffer, count);
                   ~~~~~~~~~~ ^
>> fs/io_uring.c:6639:10: error: implicit declaration of function 'io_recvzc_prep' [-Werror,-Wimplicit-function-declaration]
                   return io_recvzc_prep(req, sqe);
                          ^
   fs/io_uring.c:6639:10: note: did you mean 'io_recvmsg_prep'?
   fs/io_uring.c:5462:1: note: 'io_recvmsg_prep' declared here
   IO_NETOP_PREP_ASYNC(recvmsg);
   ^
   fs/io_uring.c:5454:38: note: expanded from macro 'IO_NETOP_PREP_ASYNC'
   #define IO_NETOP_PREP_ASYNC(op)                                         \
                                                                           ^
   fs/io_uring.c:5449:12: note: expanded from macro '\
   IO_NETOP_PREP'
   static int io_##op##_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) \
              ^
   <scratch space>:22:1: note: expanded from here
   io_recvmsg_prep
   ^
>> fs/io_uring.c:6924:9: error: implicit declaration of function 'io_recvzc' [-Werror,-Wimplicit-function-declaration]
                   ret = io_recvzc(req, issue_flags);
                         ^
   fs/io_uring.c:6924:9: note: did you mean 'io_recv'?
   fs/io_uring.c:5466:1: note: 'io_recv' declared here
   IO_NETOP_FN(recv);
   ^
   fs/io_uring.c:5442:12: note: expanded from macro 'IO_NETOP_FN'
   static int io_##op(struct io_kiocb *req, unsigned int issue_flags)      \
              ^
   <scratch space>:34:1: note: expanded from here
   io_recv
   ^
   12 warnings and 2 errors generated.


vim +/io_recvzc_prep +6639 fs/io_uring.c

  6560	
  6561	static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  6562	{
  6563		switch (req->opcode) {
  6564		case IORING_OP_NOP:
  6565			return 0;
  6566		case IORING_OP_READV:
  6567		case IORING_OP_READ_FIXED:
  6568		case IORING_OP_READ:
  6569			return io_read_prep(req, sqe);
  6570		case IORING_OP_WRITEV:
  6571		case IORING_OP_WRITE_FIXED:
  6572		case IORING_OP_WRITE:
  6573			return io_write_prep(req, sqe);
  6574		case IORING_OP_POLL_ADD:
  6575			return io_poll_add_prep(req, sqe);
  6576		case IORING_OP_POLL_REMOVE:
  6577			return io_poll_update_prep(req, sqe);
  6578		case IORING_OP_FSYNC:
  6579			return io_fsync_prep(req, sqe);
  6580		case IORING_OP_SYNC_FILE_RANGE:
  6581			return io_sfr_prep(req, sqe);
  6582		case IORING_OP_SENDMSG:
  6583		case IORING_OP_SEND:
  6584			return io_sendmsg_prep(req, sqe);
  6585		case IORING_OP_RECVMSG:
  6586		case IORING_OP_RECV:
  6587			return io_recvmsg_prep(req, sqe);
  6588		case IORING_OP_CONNECT:
  6589			return io_connect_prep(req, sqe);
  6590		case IORING_OP_TIMEOUT:
  6591			return io_timeout_prep(req, sqe, false);
  6592		case IORING_OP_TIMEOUT_REMOVE:
  6593			return io_timeout_remove_prep(req, sqe);
  6594		case IORING_OP_ASYNC_CANCEL:
  6595			return io_async_cancel_prep(req, sqe);
  6596		case IORING_OP_LINK_TIMEOUT:
  6597			return io_timeout_prep(req, sqe, true);
  6598		case IORING_OP_ACCEPT:
  6599			return io_accept_prep(req, sqe);
  6600		case IORING_OP_FALLOCATE:
  6601			return io_fallocate_prep(req, sqe);
  6602		case IORING_OP_OPENAT:
  6603			return io_openat_prep(req, sqe);
  6604		case IORING_OP_CLOSE:
  6605			return io_close_prep(req, sqe);
  6606		case IORING_OP_FILES_UPDATE:
  6607			return io_rsrc_update_prep(req, sqe);
  6608		case IORING_OP_STATX:
  6609			return io_statx_prep(req, sqe);
  6610		case IORING_OP_FADVISE:
  6611			return io_fadvise_prep(req, sqe);
  6612		case IORING_OP_MADVISE:
  6613			return io_madvise_prep(req, sqe);
  6614		case IORING_OP_OPENAT2:
  6615			return io_openat2_prep(req, sqe);
  6616		case IORING_OP_EPOLL_CTL:
  6617			return io_epoll_ctl_prep(req, sqe);
  6618		case IORING_OP_SPLICE:
  6619			return io_splice_prep(req, sqe);
  6620		case IORING_OP_PROVIDE_BUFFERS:
  6621			return io_provide_buffers_prep(req, sqe);
  6622		case IORING_OP_REMOVE_BUFFERS:
  6623			return io_remove_buffers_prep(req, sqe);
  6624		case IORING_OP_TEE:
  6625			return io_tee_prep(req, sqe);
  6626		case IORING_OP_SHUTDOWN:
  6627			return io_shutdown_prep(req, sqe);
  6628		case IORING_OP_RENAMEAT:
  6629			return io_renameat_prep(req, sqe);
  6630		case IORING_OP_UNLINKAT:
  6631			return io_unlinkat_prep(req, sqe);
  6632		case IORING_OP_MKDIRAT:
  6633			return io_mkdirat_prep(req, sqe);
  6634		case IORING_OP_SYMLINKAT:
  6635			return io_symlinkat_prep(req, sqe);
  6636		case IORING_OP_LINKAT:
  6637			return io_linkat_prep(req, sqe);
  6638		case IORING_OP_RECVZC:
> 6639			return io_recvzc_prep(req, sqe);
  6640		}
  6641	
  6642		printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
  6643				req->opcode);
  6644		return -EINVAL;
  6645	}
  6646	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot Jan. 24, 2022, 3:42 p.m. UTC | #2
Hi Hao,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.17-rc1 next-20220124]
[cannot apply to horms-ipvs/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Hao-Xu/io_uring-zerocopy-receive/20220124-174546
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git dd81e1c7d5fb126e5fbc5c9e334d7b3ec29a16a0
config: h8300-randconfig-r026-20220124 (https://download.01.org/0day-ci/archive/20220124/202201242307.INcQOwqz-lkp@intel.com/config)
compiler: h8300-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/295704165d394635876364522d3ac1451b62da66
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Hao-Xu/io_uring-zerocopy-receive/20220124-174546
        git checkout 295704165d394635876364522d3ac1451b62da66
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=h8300 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from include/linux/kernel.h:20,
                    from fs/io_uring.c:42:
   include/linux/scatterlist.h: In function 'sg_set_buf':
   include/asm-generic/page.h:89:51: warning: ordered comparison of pointer with null pointer [-Wextra]
      89 | #define virt_addr_valid(kaddr)  (((void *)(kaddr) >= (void *)PAGE_OFFSET) && \
         |                                                   ^~
   include/linux/compiler.h:78:45: note: in definition of macro 'unlikely'
      78 | # define unlikely(x)    __builtin_expect(!!(x), 0)
         |                                             ^
   include/linux/scatterlist.h:160:9: note: in expansion of macro 'BUG_ON'
     160 |         BUG_ON(!virt_addr_valid(buf));
         |         ^~~~~~
   include/linux/scatterlist.h:160:17: note: in expansion of macro 'virt_addr_valid'
     160 |         BUG_ON(!virt_addr_valid(buf));
         |                 ^~~~~~~~~~~~~~~
   fs/io_uring.c: In function '__io_submit_flush_completions':
   fs/io_uring.c:2537:40: warning: variable 'prev' set but not used [-Wunused-but-set-variable]
    2537 |         struct io_wq_work_node *node, *prev;
         |                                        ^~~~
   fs/io_uring.c: In function 'io_req_prep':
>> fs/io_uring.c:6639:24: error: implicit declaration of function 'io_recvzc_prep'; did you mean 'io_recvmsg_prep'? [-Werror=implicit-function-declaration]
    6639 |                 return io_recvzc_prep(req, sqe);
         |                        ^~~~~~~~~~~~~~
         |                        io_recvmsg_prep
   fs/io_uring.c: In function 'io_issue_sqe':
>> fs/io_uring.c:6924:23: error: implicit declaration of function 'io_recvzc'; did you mean 'io_recv'? [-Werror=implicit-function-declaration]
    6924 |                 ret = io_recvzc(req, issue_flags);
         |                       ^~~~~~~~~
         |                       io_recv
   cc1: some warnings being treated as errors


vim +6639 fs/io_uring.c

  6560	
  6561	static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  6562	{
  6563		switch (req->opcode) {
  6564		case IORING_OP_NOP:
  6565			return 0;
  6566		case IORING_OP_READV:
  6567		case IORING_OP_READ_FIXED:
  6568		case IORING_OP_READ:
  6569			return io_read_prep(req, sqe);
  6570		case IORING_OP_WRITEV:
  6571		case IORING_OP_WRITE_FIXED:
  6572		case IORING_OP_WRITE:
  6573			return io_write_prep(req, sqe);
  6574		case IORING_OP_POLL_ADD:
  6575			return io_poll_add_prep(req, sqe);
  6576		case IORING_OP_POLL_REMOVE:
  6577			return io_poll_update_prep(req, sqe);
  6578		case IORING_OP_FSYNC:
  6579			return io_fsync_prep(req, sqe);
  6580		case IORING_OP_SYNC_FILE_RANGE:
  6581			return io_sfr_prep(req, sqe);
  6582		case IORING_OP_SENDMSG:
  6583		case IORING_OP_SEND:
  6584			return io_sendmsg_prep(req, sqe);
  6585		case IORING_OP_RECVMSG:
  6586		case IORING_OP_RECV:
  6587			return io_recvmsg_prep(req, sqe);
  6588		case IORING_OP_CONNECT:
  6589			return io_connect_prep(req, sqe);
  6590		case IORING_OP_TIMEOUT:
  6591			return io_timeout_prep(req, sqe, false);
  6592		case IORING_OP_TIMEOUT_REMOVE:
  6593			return io_timeout_remove_prep(req, sqe);
  6594		case IORING_OP_ASYNC_CANCEL:
  6595			return io_async_cancel_prep(req, sqe);
  6596		case IORING_OP_LINK_TIMEOUT:
  6597			return io_timeout_prep(req, sqe, true);
  6598		case IORING_OP_ACCEPT:
  6599			return io_accept_prep(req, sqe);
  6600		case IORING_OP_FALLOCATE:
  6601			return io_fallocate_prep(req, sqe);
  6602		case IORING_OP_OPENAT:
  6603			return io_openat_prep(req, sqe);
  6604		case IORING_OP_CLOSE:
  6605			return io_close_prep(req, sqe);
  6606		case IORING_OP_FILES_UPDATE:
  6607			return io_rsrc_update_prep(req, sqe);
  6608		case IORING_OP_STATX:
  6609			return io_statx_prep(req, sqe);
  6610		case IORING_OP_FADVISE:
  6611			return io_fadvise_prep(req, sqe);
  6612		case IORING_OP_MADVISE:
  6613			return io_madvise_prep(req, sqe);
  6614		case IORING_OP_OPENAT2:
  6615			return io_openat2_prep(req, sqe);
  6616		case IORING_OP_EPOLL_CTL:
  6617			return io_epoll_ctl_prep(req, sqe);
  6618		case IORING_OP_SPLICE:
  6619			return io_splice_prep(req, sqe);
  6620		case IORING_OP_PROVIDE_BUFFERS:
  6621			return io_provide_buffers_prep(req, sqe);
  6622		case IORING_OP_REMOVE_BUFFERS:
  6623			return io_remove_buffers_prep(req, sqe);
  6624		case IORING_OP_TEE:
  6625			return io_tee_prep(req, sqe);
  6626		case IORING_OP_SHUTDOWN:
  6627			return io_shutdown_prep(req, sqe);
  6628		case IORING_OP_RENAMEAT:
  6629			return io_renameat_prep(req, sqe);
  6630		case IORING_OP_UNLINKAT:
  6631			return io_unlinkat_prep(req, sqe);
  6632		case IORING_OP_MKDIRAT:
  6633			return io_mkdirat_prep(req, sqe);
  6634		case IORING_OP_SYMLINKAT:
  6635			return io_symlinkat_prep(req, sqe);
  6636		case IORING_OP_LINKAT:
  6637			return io_linkat_prep(req, sqe);
  6638		case IORING_OP_RECVZC:
> 6639			return io_recvzc_prep(req, sqe);
  6640		}
  6641	
  6642		printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
  6643				req->opcode);
  6644		return -EINVAL;
  6645	}
  6646	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 422d6de48688..5826d84400f6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -81,6 +81,7 @@ 
 #include <linux/tracehook.h>
 #include <linux/audit.h>
 #include <linux/security.h>
+#include <net/tcp.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/io_uring.h>
@@ -581,6 +582,12 @@  struct io_sr_msg {
 	size_t				len;
 };
 
+struct io_recvzc {
+	struct file			*file;
+	char __user			*u_zc;
+	int __user			*u_len;
+};
+
 struct io_open {
 	struct file			*file;
 	int				dfd;
@@ -855,6 +862,7 @@  struct io_kiocb {
 		struct io_mkdir		mkdir;
 		struct io_symlink	symlink;
 		struct io_hardlink	hardlink;
+		struct io_recvzc	recvzc;
 	};
 
 	u8				opcode;
@@ -1105,6 +1113,12 @@  static const struct io_op_def io_op_defs[] = {
 	[IORING_OP_MKDIRAT] = {},
 	[IORING_OP_SYMLINKAT] = {},
 	[IORING_OP_LINKAT] = {},
+	[IORING_OP_RECVZC] = {
+		.needs_file		= 1,
+		.unbound_nonreg_file	= 1,
+		.pollin			= 1,
+		.audit_skip		= 1,
+	},
 };
 
 /* requests with any of those set should undergo io_disarm_next() */
@@ -5243,6 +5257,59 @@  static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 	return 0;
 }
 
+static int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_recvzc *recvzc = &req->recvzc;
+
+#ifndef CONFIG_MMU
+	return -EOPNOTSUPP;
+#endif
+	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+		return -EINVAL;
+	if (sqe->ioprio || sqe->len || sqe->buf_index)
+		return -EINVAL;
+
+	recvzc->u_zc = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	recvzc->u_len = u64_to_user_ptr(READ_ONCE(sqe->off));
+
+	return 0;
+}
+
+static int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct scm_timestamping_internal tss;
+	struct io_recvzc *recvzc = &req->recvzc;
+	struct tcp_zerocopy_receive zc;
+	char __user *u_zc = recvzc->u_zc;
+	int __user *u_len = recvzc->u_len;
+	int len = 0;
+	struct socket *sock;
+	struct sock *sk;
+	int err;
+
+	if (!(req->flags & REQ_F_POLLED))
+		return -EAGAIN;
+
+	err = zc_receive_check(&zc, &len, u_zc, u_len);
+	if (err)
+		goto out;
+
+	sock = sock_from_file(req->file);
+	if (unlikely(!sock))
+		return -ENOTSOCK;
+
+	sk = sock->sk;
+	lock_sock(sk);
+	err = tcp_zerocopy_receive(sk, &zc, &tss);
+	release_sock(sk);
+	err = zc_receive_update(sk, &zc, len, u_zc, &tss, err);
+
+out:
+	__io_req_complete(req, issue_flags, err, 0);
+
+	return 0;
+}
+
 static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_accept *accept = &req->accept;
@@ -6563,6 +6630,8 @@  static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return io_symlinkat_prep(req, sqe);
 	case IORING_OP_LINKAT:
 		return io_linkat_prep(req, sqe);
+	case IORING_OP_RECVZC:
+		return io_recvzc_prep(req, sqe);
 	}
 
 	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6846,6 +6915,9 @@  static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 	case IORING_OP_LINKAT:
 		ret = io_linkat(req, issue_flags);
 		break;
+	case IORING_OP_RECVZC:
+		ret = io_recvzc(req, issue_flags);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 787f491f0d2a..79eb43c64da2 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -143,6 +143,7 @@  enum {
 	IORING_OP_MKDIRAT,
 	IORING_OP_SYMLINKAT,
 	IORING_OP_LINKAT,
+	IORING_OP_RECVZC,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,