diff mbox series

[bpf-next,4/8] xsk: register XDP sockets at bind(), and add new AF_XDP BPF helper

Message ID 20210119153655.153999-5-bjorn.topel@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Introduce bpf_redirect_xsk() helper | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 6 maintainers not CCed: quentin@isovalent.com songliubraving@fb.com andrii@kernel.org kpsingh@kernel.org kafai@fb.com yhs@fb.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 12220 this patch: 12220
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 84 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 12877 this patch: 12877
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Björn Töpel Jan. 19, 2021, 3:36 p.m. UTC
From: Björn Töpel <bjorn.topel@intel.com>

Extend bind() for XDP sockets, so that the bound socket is added to
the netdev_rx_queue _rx array in the netdevice. We call this to
register an XDP socket. To redirect packets to a registered socket, a
new BPF helper is used: bpf_redirect_xsk().

For shared XDP sockets, only the first bound socket is
registered. Users that require more advanced setups, continue to the
XSKMAP and bpf_redirect_map().

Now, why would one use bpf_redirect_xsk() over the regular
bpf_redirect_map() helper? First: Slightly better performance. Second:
Convenience. Most user use one socket per queue. This scenario is what
registered sockets support. There is no need to create an XSKMAP. This
can also reduce complexity from containerized setups, where users
might what to use XDP sockets without CAP_SYS_ADMIN capabilities.

Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 include/linux/filter.h         |  1 +
 include/linux/netdevice.h      |  1 +
 include/net/xdp_sock.h         | 12 +++++
 include/net/xsk_buff_pool.h    |  2 +-
 include/uapi/linux/bpf.h       |  7 +++
 net/core/filter.c              | 49 ++++++++++++++++--
 net/xdp/xsk.c                  | 93 ++++++++++++++++++++++++++++------
 net/xdp/xsk_buff_pool.c        |  4 +-
 tools/include/uapi/linux/bpf.h |  7 +++
 9 files changed, 153 insertions(+), 23 deletions(-)

Comments

kernel test robot Jan. 20, 2021, 1:01 p.m. UTC | #1
Hi "Björn,

I love your patch! Yet something to improve:

[auto build test ERROR on 95204c9bfa48d2f4d3bab7df55c1cc823957ff81]

url:    https://github.com/0day-ci/linux/commits/Bj-rn-T-pel/Introduce-bpf_redirect_xsk-helper/20210120-165233
base:    95204c9bfa48d2f4d3bab7df55c1cc823957ff81
config: nios2-randconfig-r034-20210120 (attached as .config)
compiler: nios2-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/e90e83e99e93790a73ecb4071ffd4366c75f12c0
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Bj-rn-T-pel/Introduce-bpf_redirect_xsk-helper/20210120-165233
        git checkout e90e83e99e93790a73ecb4071ffd4366c75f12c0
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=nios2 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from <command-line>:
   net/core/filter.c: In function '____bpf_xdp_redirect_xsk':
>> net/core/filter.c:4165:35: error: 'struct netdev_rx_queue' has no member named 'xsk'
    4165 |  xs = READ_ONCE(dev->_rx[queue_id].xsk);
         |                                   ^
   include/linux/compiler_types.h:300:9: note: in definition of macro '__compiletime_assert'
     300 |   if (!(condition))     \
         |         ^~~~~~~~~
   include/linux/compiler_types.h:320:2: note: in expansion of macro '_compiletime_assert'
     320 |  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
         |  ^~~~~~~~~~~~~~~~~~~
   include/asm-generic/rwonce.h:36:2: note: in expansion of macro 'compiletime_assert'
      36 |  compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
         |  ^~~~~~~~~~~~~~~~~~
   include/asm-generic/rwonce.h:36:21: note: in expansion of macro '__native_word'
      36 |  compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
         |                     ^~~~~~~~~~~~~
   include/asm-generic/rwonce.h:49:2: note: in expansion of macro 'compiletime_assert_rwonce_type'
      49 |  compiletime_assert_rwonce_type(x);    \
         |  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   net/core/filter.c:4165:7: note: in expansion of macro 'READ_ONCE'
    4165 |  xs = READ_ONCE(dev->_rx[queue_id].xsk);
         |       ^~~~~~~~~
>> net/core/filter.c:4165:35: error: 'struct netdev_rx_queue' has no member named 'xsk'
    4165 |  xs = READ_ONCE(dev->_rx[queue_id].xsk);
         |                                   ^
   include/linux/compiler_types.h:300:9: note: in definition of macro '__compiletime_assert'
     300 |   if (!(condition))     \
         |         ^~~~~~~~~
   include/linux/compiler_types.h:320:2: note: in expansion of macro '_compiletime_assert'
     320 |  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
         |  ^~~~~~~~~~~~~~~~~~~
   include/asm-generic/rwonce.h:36:2: note: in expansion of macro 'compiletime_assert'
      36 |  compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
         |  ^~~~~~~~~~~~~~~~~~
   include/asm-generic/rwonce.h:36:21: note: in expansion of macro '__native_word'
      36 |  compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
         |                     ^~~~~~~~~~~~~
   include/asm-generic/rwonce.h:49:2: note: in expansion of macro 'compiletime_assert_rwonce_type'
      49 |  compiletime_assert_rwonce_type(x);    \
         |  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   net/core/filter.c:4165:7: note: in expansion of macro 'READ_ONCE'
    4165 |  xs = READ_ONCE(dev->_rx[queue_id].xsk);
         |       ^~~~~~~~~
>> net/core/filter.c:4165:35: error: 'struct netdev_rx_queue' has no member named 'xsk'
    4165 |  xs = READ_ONCE(dev->_rx[queue_id].xsk);
         |                                   ^
   include/linux/compiler_types.h:300:9: note: in definition of macro '__compiletime_assert'
     300 |   if (!(condition))     \
         |         ^~~~~~~~~
   include/linux/compiler_types.h:320:2: note: in expansion of macro '_compiletime_assert'
     320 |  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
         |  ^~~~~~~~~~~~~~~~~~~
   include/asm-generic/rwonce.h:36:2: note: in expansion of macro 'compiletime_assert'
      36 |  compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
         |  ^~~~~~~~~~~~~~~~~~
   include/asm-generic/rwonce.h:36:21: note: in expansion of macro '__native_word'
      36 |  compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
         |                     ^~~~~~~~~~~~~
   include/asm-generic/rwonce.h:49:2: note: in expansion of macro 'compiletime_assert_rwonce_type'
      49 |  compiletime_assert_rwonce_type(x);    \
         |  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   net/core/filter.c:4165:7: note: in expansion of macro 'READ_ONCE'
    4165 |  xs = READ_ONCE(dev->_rx[queue_id].xsk);
         |       ^~~~~~~~~
>> net/core/filter.c:4165:35: error: 'struct netdev_rx_queue' has no member named 'xsk'
    4165 |  xs = READ_ONCE(dev->_rx[queue_id].xsk);
         |                                   ^
   include/linux/compiler_types.h:300:9: note: in definition of macro '__compiletime_assert'
     300 |   if (!(condition))     \
         |         ^~~~~~~~~
   include/linux/compiler_types.h:320:2: note: in expansion of macro '_compiletime_assert'
     320 |  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
         |  ^~~~~~~~~~~~~~~~~~~
   include/asm-generic/rwonce.h:36:2: note: in expansion of macro 'compiletime_assert'
      36 |  compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
         |  ^~~~~~~~~~~~~~~~~~
   include/asm-generic/rwonce.h:36:21: note: in expansion of macro '__native_word'
      36 |  compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
         |                     ^~~~~~~~~~~~~
   include/asm-generic/rwonce.h:49:2: note: in expansion of macro 'compiletime_assert_rwonce_type'
      49 |  compiletime_assert_rwonce_type(x);    \
         |  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   net/core/filter.c:4165:7: note: in expansion of macro 'READ_ONCE'
    4165 |  xs = READ_ONCE(dev->_rx[queue_id].xsk);
         |       ^~~~~~~~~
>> net/core/filter.c:4165:35: error: 'struct netdev_rx_queue' has no member named 'xsk'
    4165 |  xs = READ_ONCE(dev->_rx[queue_id].xsk);
         |                                   ^
   include/linux/compiler_types.h:300:9: note: in definition of macro '__compiletime_assert'
     300 |   if (!(condition))     \
         |         ^~~~~~~~~
   include/linux/compiler_types.h:320:2: note: in expansion of macro '_compiletime_assert'
     320 |  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
         |  ^~~~~~~~~~~~~~~~~~~
   include/asm-generic/rwonce.h:36:2: note: in expansion of macro 'compiletime_assert'
      36 |  compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
         |  ^~~~~~~~~~~~~~~~~~
   include/asm-generic/rwonce.h:49:2: note: in expansion of macro 'compiletime_assert_rwonce_type'
      49 |  compiletime_assert_rwonce_type(x);    \
         |  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   net/core/filter.c:4165:7: note: in expansion of macro 'READ_ONCE'
    4165 |  xs = READ_ONCE(dev->_rx[queue_id].xsk);
         |       ^~~~~~~~~
>> net/core/filter.c:4165:35: error: 'struct netdev_rx_queue' has no member named 'xsk'
    4165 |  xs = READ_ONCE(dev->_rx[queue_id].xsk);
         |                                   ^
   include/linux/compiler_types.h:271:13: note: in definition of macro '__unqual_scalar_typeof'
     271 |   _Generic((x),      \
         |             ^
   include/asm-generic/rwonce.h:50:2: note: in expansion of macro '__READ_ONCE'
      50 |  __READ_ONCE(x);       \
         |  ^~~~~~~~~~~
   net/core/filter.c:4165:7: note: in expansion of macro 'READ_ONCE'
    4165 |  xs = READ_ONCE(dev->_rx[queue_id].xsk);
         |       ^~~~~~~~~
   In file included from ./arch/nios2/include/generated/asm/rwonce.h:1,
                    from include/linux/compiler.h:246,
                    from include/linux/kernel.h:10,
                    from include/linux/list.h:9,
                    from include/linux/module.h:12,
                    from net/core/filter.c:20:
>> net/core/filter.c:4165:35: error: 'struct netdev_rx_queue' has no member named 'xsk'
    4165 |  xs = READ_ONCE(dev->_rx[queue_id].xsk);
         |                                   ^
   include/asm-generic/rwonce.h:44:72: note: in definition of macro '__READ_ONCE'
      44 | #define __READ_ONCE(x) (*(const volatile __unqual_scalar_typeof(x) *)&(x))
         |                                                                        ^
   net/core/filter.c:4165:7: note: in expansion of macro 'READ_ONCE'
    4165 |  xs = READ_ONCE(dev->_rx[queue_id].xsk);
         |       ^~~~~~~~~

Kconfig warnings: (for reference only)
   WARNING: unmet direct dependencies detected for SERIAL_CORE_CONSOLE
   Depends on TTY && HAS_IOMEM
   Selected by
   - EARLY_PRINTK


vim +4165 net/core/filter.c

  4157	
  4158	BPF_CALL_2(bpf_xdp_redirect_xsk, struct xdp_buff *, xdp, u64, action)
  4159	{
  4160		struct net_device *dev = xdp->rxq->dev;
  4161		u32 queue_id = xdp->rxq->queue_index;
  4162		struct bpf_redirect_info *ri;
  4163		struct xdp_sock *xs;
  4164	
> 4165		xs = READ_ONCE(dev->_rx[queue_id].xsk);
  4166		if (!xs)
  4167			return action;
  4168	
  4169		ri = this_cpu_ptr(&bpf_redirect_info);
  4170		ri->tgt_type = XDP_REDIR_XSK;
  4171		ri->tgt_value = xs;
  4172	
  4173		return XDP_REDIRECT;
  4174	}
  4175	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5fc336a271c2..3f9efbd08cba 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -648,6 +648,7 @@  enum xdp_redirect_type {
 	XDP_REDIR_DEV_MAP,
 	XDP_REDIR_CPU_MAP,
 	XDP_REDIR_XSK_MAP,
+	XDP_REDIR_XSK,
 };
 
 DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5b949076ed23..cb0e215e981c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -749,6 +749,7 @@  struct netdev_rx_queue {
 	struct xdp_rxq_info		xdp_rxq;
 #ifdef CONFIG_XDP_SOCKETS
 	struct xsk_buff_pool            *pool;
+	struct xdp_sock			*xsk;
 #endif
 } ____cacheline_aligned_in_smp;
 
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index cc17bc957548..97b21c483baf 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -77,8 +77,10 @@  struct xdp_sock {
 #ifdef CONFIG_XDP_SOCKETS
 
 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
+int xsk_generic_redirect(struct net_device *dev, struct xdp_buff *xdp);
 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
 void __xsk_map_flush(void);
+int xsk_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
 
 static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
 						     u32 key)
@@ -100,6 +102,11 @@  static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 	return -ENOTSUPP;
 }
 
+static inline int xsk_generic_redirect(struct net_device *dev, struct xdp_buff *xdp)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
 	return -EOPNOTSUPP;
@@ -115,6 +122,11 @@  static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
 	return NULL;
 }
 
+static inline int xsk_redirect(struct net_device *dev, struct xdp_buff *xdp)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_XDP_SOCKETS */
 
 #endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index eaa8386dbc63..bd531d561c60 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -84,7 +84,7 @@  struct xsk_buff_pool {
 /* AF_XDP core. */
 struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
 						struct xdp_umem *umem);
-int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
+int xp_assign_dev(struct xdp_sock *xs, struct xsk_buff_pool *pool, struct net_device *dev,
 		  u16 queue_id, u16 flags);
 int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
 			 struct net_device *dev, u16 queue_id);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c001766adcbc..bbc7d9a57262 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3836,6 +3836,12 @@  union bpf_attr {
  *	Return
  *		A pointer to a struct socket on success or NULL if the file is
  *		not a socket.
+ *
+ * long bpf_redirect_xsk(struct xdp_buff *xdp_md, u64 action)
+ *	Description
+ *		Redirect to the registered AF_XDP socket.
+ *	Return
+ *		**XDP_REDIRECT** on success, otherwise the action parameter is returned.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4001,6 +4007,7 @@  union bpf_attr {
 	FN(ktime_get_coarse_ns),	\
 	FN(ima_inode_hash),		\
 	FN(sock_from_file),		\
+	FN(redirect_xsk),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/net/core/filter.c b/net/core/filter.c
index 5f31e21be531..b457c83fba70 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3977,6 +3977,9 @@  int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 	case XDP_REDIR_XSK_MAP:
 		err = __xsk_map_redirect(fwd, xdp);
 		break;
+	case XDP_REDIR_XSK:
+		err = xsk_redirect(fwd, xdp);
+		break;
 	default:
 		err = -EBADRQC;
 	}
@@ -4044,25 +4047,33 @@  int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 	ri->tgt_type = XDP_REDIR_UNSET;
 	ri->tgt_value = NULL;
 
-	if (type == XDP_REDIR_DEV_IFINDEX) {
+	switch (type) {
+	case XDP_REDIR_DEV_IFINDEX: {
 		fwd = dev_get_by_index_rcu(dev_net(dev), (u32)(long)fwd);
 		if (unlikely(!fwd)) {
 			err = -EINVAL;
-			goto err;
+			break;
 		}
 
 		err = xdp_ok_fwd_dev(fwd, skb->len);
 		if (unlikely(err))
-			goto err;
+			break;
 
 		skb->dev = fwd;
 		_trace_xdp_redirect(dev, xdp_prog, index);
 		generic_xdp_tx(skb, xdp_prog);
 		return 0;
 	}
+	case XDP_REDIR_XSK:
+		err = xsk_generic_redirect(dev, xdp);
+		if (err)
+			break;
+		consume_skb(skb);
+		break;
+	default:
+		return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, type);
+	}
 
-	return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, type);
-err:
 	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
 	return err;
 }
@@ -4144,6 +4155,32 @@  static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
 	.arg3_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_xdp_redirect_xsk, struct xdp_buff *, xdp, u64, action)
+{
+	struct net_device *dev = xdp->rxq->dev;
+	u32 queue_id = xdp->rxq->queue_index;
+	struct bpf_redirect_info *ri;
+	struct xdp_sock *xs;
+
+	xs = READ_ONCE(dev->_rx[queue_id].xsk);
+	if (!xs)
+		return action;
+
+	ri = this_cpu_ptr(&bpf_redirect_info);
+	ri->tgt_type = XDP_REDIR_XSK;
+	ri->tgt_value = xs;
+
+	return XDP_REDIRECT;
+}
+
+static const struct bpf_func_proto bpf_xdp_redirect_xsk_proto = {
+	.func           = bpf_xdp_redirect_xsk,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
 				  unsigned long off, unsigned long len)
 {
@@ -7260,6 +7297,8 @@  xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_tcp_gen_syncookie:
 		return &bpf_tcp_gen_syncookie_proto;
 #endif
+	case BPF_FUNC_redirect_xsk:
+		return &bpf_xdp_redirect_xsk_proto;
 	default:
 		return bpf_sk_base_func_proto(func_id);
 	}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 5820de65060b..79f1492e71e2 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -134,6 +134,28 @@  int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
 	return 0;
 }
 
+static struct xdp_sock *xsk_get_at_qid(struct net_device *dev, u16 queue_id)
+{
+	return READ_ONCE(dev->_rx[queue_id].xsk);
+}
+
+static void xsk_clear(struct xdp_sock *xs)
+{
+	struct net_device *dev = xs->dev;
+	u16 queue_id = xs->queue_id;
+
+	if (queue_id < dev->num_rx_queues)
+		WRITE_ONCE(dev->_rx[queue_id].xsk, NULL);
+}
+
+static void xsk_reg(struct xdp_sock *xs)
+{
+	struct net_device *dev = xs->dev;
+	u16 queue_id = xs->queue_id;
+
+	WRITE_ONCE(dev->_rx[queue_id].xsk, xs);
+}
+
 void xp_release(struct xdp_buff_xsk *xskb)
 {
 	xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
@@ -184,7 +206,7 @@  static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len)
 	memcpy(to_buf, from_buf, len + metalen);
 }
 
-static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+static int ____xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
 	struct xdp_buff *xsk_xdp;
 	int err;
@@ -211,6 +233,22 @@  static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 	return 0;
 }
 
+static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+	int err;
+	u32 len;
+
+	if (likely(xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)) {
+		len = xdp->data_end - xdp->data;
+		return __xsk_rcv_zc(xs, xdp, len);
+	}
+
+	err = ____xsk_rcv(xs, xdp);
+	if (!err)
+		xdp_return_buff(xdp);
+	return err;
+}
+
 static bool xsk_tx_writeable(struct xdp_sock *xs)
 {
 	if (xskq_cons_present_entries(xs->tx) > xs->tx->nentries / 2)
@@ -248,6 +286,39 @@  static void xsk_flush(struct xdp_sock *xs)
 	sock_def_readable(&xs->sk);
 }
 
+int xsk_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+	struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
+	int err;
+
+	sk_mark_napi_id_once_xdp(&xs->sk, xdp);
+	err = __xsk_rcv(xs, xdp);
+	if (err)
+		return err;
+
+	if (!xs->flush_node.prev)
+		list_add(&xs->flush_node, flush_list);
+	return 0;
+}
+
+int xsk_generic_redirect(struct net_device *dev, struct xdp_buff *xdp)
+{
+	struct xdp_sock *xs;
+	u32 queue_id;
+	int err;
+
+	queue_id = xdp->rxq->queue_index;
+	xs = xsk_get_at_qid(dev, queue_id);
+	if (!xs)
+		return -EINVAL;
+
+	spin_lock_bh(&xs->rx_lock);
+	err = ____xsk_rcv(xs, xdp);
+	xsk_flush(xs);
+	spin_unlock_bh(&xs->rx_lock);
+	return err;
+}
+
 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
 	int err;
@@ -255,7 +326,7 @@  int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 	spin_lock_bh(&xs->rx_lock);
 	err = xsk_rcv_check(xs, xdp);
 	if (!err) {
-		err = __xsk_rcv(xs, xdp);
+		err = ____xsk_rcv(xs, xdp);
 		xsk_flush(xs);
 	}
 	spin_unlock_bh(&xs->rx_lock);
@@ -264,22 +335,12 @@  int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 
 static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
-	int err;
-	u32 len;
+	int err = xsk_rcv_check(xs, xdp);
 
-	err = xsk_rcv_check(xs, xdp);
 	if (err)
 		return err;
 
-	if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
-		len = xdp->data_end - xdp->data;
-		return __xsk_rcv_zc(xs, xdp, len);
-	}
-
-	err = __xsk_rcv(xs, xdp);
-	if (!err)
-		xdp_return_buff(xdp);
-	return err;
+	return __xsk_rcv(xs, xdp);
 }
 
 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
@@ -661,6 +722,7 @@  static void xsk_unbind_dev(struct xdp_sock *xs)
 
 	if (xs->state != XSK_BOUND)
 		return;
+	xsk_clear(xs);
 	WRITE_ONCE(xs->state, XSK_UNBOUND);
 
 	/* Wait for driver to stop using the xdp socket. */
@@ -892,7 +954,7 @@  static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 			goto out_unlock;
 		}
 
-		err = xp_assign_dev(xs->pool, dev, qid, flags);
+		err = xp_assign_dev(xs, xs->pool, dev, qid, flags);
 		if (err) {
 			xp_destroy(xs->pool);
 			xs->pool = NULL;
@@ -918,6 +980,7 @@  static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		 */
 		smp_wmb();
 		WRITE_ONCE(xs->state, XSK_BOUND);
+		xsk_reg(xs);
 	}
 out_release:
 	mutex_unlock(&xs->mutex);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 8de01aaac4a0..af02a69d0bf7 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -119,7 +119,7 @@  static void xp_disable_drv_zc(struct xsk_buff_pool *pool)
 	}
 }
 
-int xp_assign_dev(struct xsk_buff_pool *pool,
+int xp_assign_dev(struct xdp_sock *xs, struct xsk_buff_pool *pool,
 		  struct net_device *netdev, u16 queue_id, u16 flags)
 {
 	bool force_zc, force_copy;
@@ -204,7 +204,7 @@  int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
 	if (pool->uses_need_wakeup)
 		flags |= XDP_USE_NEED_WAKEUP;
 
-	return xp_assign_dev(pool, dev, queue_id, flags);
+	return xp_assign_dev(NULL, pool, dev, queue_id, flags);
 }
 
 void xp_clear_dev(struct xsk_buff_pool *pool)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c001766adcbc..bbc7d9a57262 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3836,6 +3836,12 @@  union bpf_attr {
  *	Return
  *		A pointer to a struct socket on success or NULL if the file is
  *		not a socket.
+ *
+ * long bpf_redirect_xsk(struct xdp_buff *xdp_md, u64 action)
+ *	Description
+ *		Redirect to the registered AF_XDP socket.
+ *	Return
+ *		**XDP_REDIRECT** on success, otherwise the action parameter is returned.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4001,6 +4007,7 @@  union bpf_attr {
 	FN(ktime_get_coarse_ns),	\
 	FN(ima_inode_hash),		\
 	FN(sock_from_file),		\
+	FN(redirect_xsk),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper