Message ID | 20210119155013.154808-5-bjorn.topel@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | Introduce bpf_redirect_xsk() helper | expand |
Hi "Björn, I love your patch! Yet something to improve: [auto build test ERROR on 95204c9bfa48d2f4d3bab7df55c1cc823957ff81] url: https://github.com/0day-ci/linux/commits/Bj-rn-T-pel/Introduce-bpf_redirect_xsk-helper/20210120-150357 base: 95204c9bfa48d2f4d3bab7df55c1cc823957ff81 config: x86_64-randconfig-m031-20210120 (attached as .config) compiler: gcc-9 (Debian 9.3.0-15) 9.3.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/419b1341d7980ee57fb10f4306719eef3c1a15f8 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Bj-rn-T-pel/Introduce-bpf_redirect_xsk-helper/20210120-150357 git checkout 419b1341d7980ee57fb10f4306719eef3c1a15f8 # save the attached .config to linux build tree make W=1 ARCH=x86_64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): In file included from <command-line>: net/core/filter.c: In function '____bpf_xdp_redirect_xsk': >> net/core/filter.c:4165:35: error: 'struct netdev_rx_queue' has no member named 'xsk' 4165 | xs = READ_ONCE(dev->_rx[queue_id].xsk); | ^ include/linux/compiler_types.h:300:9: note: in definition of macro '__compiletime_assert' 300 | if (!(condition)) \ | ^~~~~~~~~ include/linux/compiler_types.h:320:2: note: in expansion of macro '_compiletime_assert' 320 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^~~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:36:2: note: in expansion of macro 'compiletime_assert' 36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ | ^~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:36:21: note: in expansion of macro '__native_word' 36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ | ^~~~~~~~~~~~~ include/asm-generic/rwonce.h:49:2: note: in expansion of macro 'compiletime_assert_rwonce_type' 49 | compiletime_assert_rwonce_type(x); \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/core/filter.c:4165:7: note: in expansion of macro 'READ_ONCE' 4165 | xs = READ_ONCE(dev->_rx[queue_id].xsk); | ^~~~~~~~~ >> net/core/filter.c:4165:35: error: 'struct netdev_rx_queue' has no member named 'xsk' 4165 | xs = READ_ONCE(dev->_rx[queue_id].xsk); | ^ include/linux/compiler_types.h:300:9: note: in definition of macro '__compiletime_assert' 300 | if (!(condition)) \ | ^~~~~~~~~ include/linux/compiler_types.h:320:2: note: in expansion of macro '_compiletime_assert' 320 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^~~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:36:2: note: in expansion of macro 'compiletime_assert' 36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ | ^~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:36:21: note: in expansion of macro '__native_word' 36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ | ^~~~~~~~~~~~~ include/asm-generic/rwonce.h:49:2: note: in expansion of macro 'compiletime_assert_rwonce_type' 49 | compiletime_assert_rwonce_type(x); \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/core/filter.c:4165:7: note: in expansion of macro 'READ_ONCE' 4165 | xs = READ_ONCE(dev->_rx[queue_id].xsk); | ^~~~~~~~~ >> net/core/filter.c:4165:35: error: 'struct netdev_rx_queue' has no member named 'xsk' 4165 | xs = READ_ONCE(dev->_rx[queue_id].xsk); | ^ include/linux/compiler_types.h:300:9: note: in definition of macro '__compiletime_assert' 300 | if (!(condition)) \ | ^~~~~~~~~ include/linux/compiler_types.h:320:2: note: in expansion of macro '_compiletime_assert' 320 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^~~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:36:2: note: in expansion of macro 'compiletime_assert' 36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ | ^~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:36:21: note: in expansion of macro '__native_word' 36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ | ^~~~~~~~~~~~~ include/asm-generic/rwonce.h:49:2: note: in expansion of macro 'compiletime_assert_rwonce_type' 49 | compiletime_assert_rwonce_type(x); \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/core/filter.c:4165:7: note: in expansion of macro 'READ_ONCE' 4165 | xs = READ_ONCE(dev->_rx[queue_id].xsk); | ^~~~~~~~~ >> net/core/filter.c:4165:35: error: 'struct netdev_rx_queue' has no member named 'xsk' 4165 | xs = READ_ONCE(dev->_rx[queue_id].xsk); | ^ include/linux/compiler_types.h:300:9: note: in definition of macro '__compiletime_assert' 300 | if (!(condition)) \ | ^~~~~~~~~ include/linux/compiler_types.h:320:2: note: in expansion of macro '_compiletime_assert' 320 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^~~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:36:2: note: in expansion of macro 'compiletime_assert' 36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ | ^~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:36:21: note: in expansion of macro '__native_word' 36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ | ^~~~~~~~~~~~~ include/asm-generic/rwonce.h:49:2: note: in expansion of macro 'compiletime_assert_rwonce_type' 49 | compiletime_assert_rwonce_type(x); \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/core/filter.c:4165:7: note: in expansion of macro 'READ_ONCE' 4165 | xs = READ_ONCE(dev->_rx[queue_id].xsk); | ^~~~~~~~~ >> net/core/filter.c:4165:35: error: 'struct netdev_rx_queue' has no member named 'xsk' 4165 | xs = READ_ONCE(dev->_rx[queue_id].xsk); | ^ include/linux/compiler_types.h:300:9: note: in definition of macro '__compiletime_assert' 300 | if (!(condition)) \ | ^~~~~~~~~ include/linux/compiler_types.h:320:2: note: in expansion of macro '_compiletime_assert' 320 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^~~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:36:2: note: in expansion of macro 'compiletime_assert' 36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ | ^~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:49:2: note: in expansion of macro 'compiletime_assert_rwonce_type' 49 | compiletime_assert_rwonce_type(x); \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/core/filter.c:4165:7: note: in expansion of macro 'READ_ONCE' 4165 | xs = READ_ONCE(dev->_rx[queue_id].xsk); | ^~~~~~~~~ >> net/core/filter.c:4165:35: error: 'struct netdev_rx_queue' has no member named 'xsk' 4165 | xs = READ_ONCE(dev->_rx[queue_id].xsk); | ^ include/linux/compiler_types.h:271:13: note: in definition of macro '__unqual_scalar_typeof' 271 | _Generic((x), \ | ^ include/asm-generic/rwonce.h:50:2: note: in expansion of macro '__READ_ONCE' 50 | __READ_ONCE(x); \ | ^~~~~~~~~~~ net/core/filter.c:4165:7: note: in expansion of macro 'READ_ONCE' 4165 | xs = READ_ONCE(dev->_rx[queue_id].xsk); | ^~~~~~~~~ In file included from ./arch/x86/include/generated/asm/rwonce.h:1, from include/linux/compiler.h:246, from include/linux/export.h:43, from include/linux/linkage.h:7, from include/linux/kernel.h:7, from include/linux/list.h:9, from include/linux/module.h:12, from net/core/filter.c:20: >> net/core/filter.c:4165:35: error: 'struct netdev_rx_queue' has no member named 'xsk' 4165 | xs = READ_ONCE(dev->_rx[queue_id].xsk); | ^ include/asm-generic/rwonce.h:44:72: note: in definition of macro '__READ_ONCE' 44 | #define __READ_ONCE(x) (*(const volatile __unqual_scalar_typeof(x) *)&(x)) | ^ net/core/filter.c:4165:7: note: in expansion of macro 'READ_ONCE' 4165 | xs = READ_ONCE(dev->_rx[queue_id].xsk); | ^~~~~~~~~ vim +4165 net/core/filter.c 4157 4158 BPF_CALL_2(bpf_xdp_redirect_xsk, struct xdp_buff *, xdp, u64, action) 4159 { 4160 struct net_device *dev = xdp->rxq->dev; 4161 u32 queue_id = xdp->rxq->queue_index; 4162 struct bpf_redirect_info *ri; 4163 struct xdp_sock *xs; 4164 > 4165 xs = READ_ONCE(dev->_rx[queue_id].xsk); 4166 if (!xs) 4167 return action; 4168 4169 ri = this_cpu_ptr(&bpf_redirect_info); 4170 ri->tgt_type = XDP_REDIR_XSK; 4171 ri->tgt_value = xs; 4172 4173 return XDP_REDIRECT; 4174 } 4175 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On 2021-01-20 09:25, kernel test robot wrote: > Hi "Björn, > > I love your patch! Yet something to improve: > > [auto build test ERROR on 95204c9bfa48d2f4d3bab7df55c1cc823957ff81] > > url: https://github.com/0day-ci/linux/commits/Bj-rn-T-pel/Introduce-bpf_redirect_xsk-helper/20210120-150357 > base: 95204c9bfa48d2f4d3bab7df55c1cc823957ff81 > config: x86_64-randconfig-m031-20210120 (attached as .config) > compiler: gcc-9 (Debian 9.3.0-15) 9.3.0 > reproduce (this is a W=1 build): > # https://github.com/0day-ci/linux/commit/419b1341d7980ee57fb10f4306719eef3c1a15f8 > git remote add linux-review https://github.com/0day-ci/linux > git fetch --no-tags linux-review Bj-rn-T-pel/Introduce-bpf_redirect_xsk-helper/20210120-150357 > git checkout 419b1341d7980ee57fb10f4306719eef3c1a15f8 > # save the attached .config to linux build tree > make W=1 ARCH=x86_64 > > If you fix the issue, kindly add following tag as appropriate > Reported-by: kernel test robot <lkp@intel.com> > > All errors (new ones prefixed by >>): > > In file included from <command-line>: > net/core/filter.c: In function '____bpf_xdp_redirect_xsk': >>> net/core/filter.c:4165:35: error: 'struct netdev_rx_queue' has no member named 'xsk' This rev is missing proper CONFIG_XDP_SOCKET ifdefs. I'll fix this in the next spin, but I'll wait a bit for more comments. Björn [...]
Hi "Björn, I love your patch! Yet something to improve: [auto build test ERROR on 95204c9bfa48d2f4d3bab7df55c1cc823957ff81] url: https://github.com/0day-ci/linux/commits/Bj-rn-T-pel/Introduce-bpf_redirect_xsk-helper/20210120-150357 base: 95204c9bfa48d2f4d3bab7df55c1cc823957ff81 config: arm-randconfig-r013-20210120 (attached as .config) compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 22b68440e1647e16b5ee24b924986207173c02d1) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install arm cross compiling tool for clang build # apt-get install binutils-arm-linux-gnueabi # https://github.com/0day-ci/linux/commit/419b1341d7980ee57fb10f4306719eef3c1a15f8 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Bj-rn-T-pel/Introduce-bpf_redirect_xsk-helper/20210120-150357 git checkout 419b1341d7980ee57fb10f4306719eef3c1a15f8 # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): >> net/core/filter.c:4165:36: error: no member named 'xsk' in 'struct netdev_rx_queue' xs = READ_ONCE(dev->_rx[queue_id].xsk); ~~~~~~~~~~~~~~~~~~ ^ include/asm-generic/rwonce.h:49:33: note: expanded from macro 'READ_ONCE' compiletime_assert_rwonce_type(x); \ ^ include/asm-generic/rwonce.h:36:35: note: expanded from macro 'compiletime_assert_rwonce_type' compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ ^ include/linux/compiler_types.h:282:10: note: expanded from macro '__native_word' (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \ ^ include/linux/compiler_types.h:320:22: note: expanded from macro 'compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^~~~~~~~~ include/linux/compiler_types.h:308:23: note: expanded from macro '_compiletime_assert' __compiletime_assert(condition, msg, prefix, suffix) ^~~~~~~~~ include/linux/compiler_types.h:300:9: note: expanded from macro '__compiletime_assert' if (!(condition)) \ ^~~~~~~~~ >> net/core/filter.c:4165:36: error: no member named 'xsk' in 'struct netdev_rx_queue' xs = READ_ONCE(dev->_rx[queue_id].xsk); ~~~~~~~~~~~~~~~~~~ ^ include/asm-generic/rwonce.h:49:33: note: expanded from macro 'READ_ONCE' compiletime_assert_rwonce_type(x); \ ^ include/asm-generic/rwonce.h:36:35: note: expanded from macro 'compiletime_assert_rwonce_type' compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ ^ include/linux/compiler_types.h:282:39: note: expanded from macro '__native_word' (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \ ^ include/linux/compiler_types.h:320:22: note: expanded from macro 'compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^~~~~~~~~ include/linux/compiler_types.h:308:23: note: expanded from macro '_compiletime_assert' __compiletime_assert(condition, msg, prefix, suffix) ^~~~~~~~~ include/linux/compiler_types.h:300:9: note: expanded from macro '__compiletime_assert' if (!(condition)) \ ^~~~~~~~~ >> net/core/filter.c:4165:36: error: no member named 'xsk' in 'struct netdev_rx_queue' xs = READ_ONCE(dev->_rx[queue_id].xsk); ~~~~~~~~~~~~~~~~~~ ^ include/asm-generic/rwonce.h:49:33: note: expanded from macro 'READ_ONCE' compiletime_assert_rwonce_type(x); \ ^ include/asm-generic/rwonce.h:36:35: note: expanded from macro 'compiletime_assert_rwonce_type' compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ ^ include/linux/compiler_types.h:283:10: note: expanded from macro '__native_word' sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) ^ include/linux/compiler_types.h:320:22: note: expanded from macro 'compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^~~~~~~~~ include/linux/compiler_types.h:308:23: note: expanded from macro '_compiletime_assert' __compiletime_assert(condition, msg, prefix, suffix) ^~~~~~~~~ include/linux/compiler_types.h:300:9: note: expanded from macro '__compiletime_assert' if (!(condition)) \ ^~~~~~~~~ >> net/core/filter.c:4165:36: error: no member named 'xsk' in 'struct netdev_rx_queue' xs = READ_ONCE(dev->_rx[queue_id].xsk); ~~~~~~~~~~~~~~~~~~ ^ include/asm-generic/rwonce.h:49:33: note: expanded from macro 'READ_ONCE' compiletime_assert_rwonce_type(x); \ ^ include/asm-generic/rwonce.h:36:35: note: expanded from macro 'compiletime_assert_rwonce_type' compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ ^ include/linux/compiler_types.h:283:38: note: expanded from macro '__native_word' sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) ^ include/linux/compiler_types.h:320:22: note: expanded from macro 'compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^~~~~~~~~ include/linux/compiler_types.h:308:23: note: expanded from macro '_compiletime_assert' __compiletime_assert(condition, msg, prefix, suffix) ^~~~~~~~~ include/linux/compiler_types.h:300:9: note: expanded from macro '__compiletime_assert' if (!(condition)) \ ^~~~~~~~~ >> net/core/filter.c:4165:36: error: no member named 'xsk' in 'struct netdev_rx_queue' xs = READ_ONCE(dev->_rx[queue_id].xsk); ~~~~~~~~~~~~~~~~~~ ^ include/asm-generic/rwonce.h:49:33: note: expanded from macro 'READ_ONCE' compiletime_assert_rwonce_type(x); \ ^ include/asm-generic/rwonce.h:36:48: note: expanded from macro 'compiletime_assert_rwonce_type' compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ ^ include/linux/compiler_types.h:320:22: note: expanded from macro 'compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^~~~~~~~~ include/linux/compiler_types.h:308:23: note: expanded from macro '_compiletime_assert' __compiletime_assert(condition, msg, prefix, suffix) ^~~~~~~~~ include/linux/compiler_types.h:300:9: note: expanded from macro '__compiletime_assert' if (!(condition)) \ ^~~~~~~~~ >> net/core/filter.c:4165:36: error: no member named 'xsk' in 'struct netdev_rx_queue' xs = READ_ONCE(dev->_rx[queue_id].xsk); ~~~~~~~~~~~~~~~~~~ ^ include/asm-generic/rwonce.h:50:14: note: expanded from macro 'READ_ONCE' __READ_ONCE(x); \ ^ include/asm-generic/rwonce.h:44:65: note: expanded from macro '__READ_ONCE' #define __READ_ONCE(x) (*(const volatile __unqual_scalar_typeof(x) *)&(x)) ^ include/linux/compiler_types.h:271:13: note: expanded from macro '__unqual_scalar_typeof' _Generic((x), \ ^ >> net/core/filter.c:4165:36: error: no member named 'xsk' in 'struct netdev_rx_queue' xs = READ_ONCE(dev->_rx[queue_id].xsk); ~~~~~~~~~~~~~~~~~~ ^ include/asm-generic/rwonce.h:50:14: note: expanded from macro 'READ_ONCE' __READ_ONCE(x); \ ^ include/asm-generic/rwonce.h:44:65: note: expanded from macro '__READ_ONCE' #define __READ_ONCE(x) (*(const volatile __unqual_scalar_typeof(x) *)&(x)) ^ include/linux/compiler_types.h:278:15: note: expanded from macro '__unqual_scalar_typeof' default: (x))) ^ >> net/core/filter.c:4165:36: error: no member named 'xsk' in 'struct netdev_rx_queue' xs = READ_ONCE(dev->_rx[queue_id].xsk); ~~~~~~~~~~~~~~~~~~ ^ include/asm-generic/rwonce.h:50:14: note: expanded from macro 'READ_ONCE' __READ_ONCE(x); \ ^ include/asm-generic/rwonce.h:44:72: note: expanded from macro '__READ_ONCE' #define __READ_ONCE(x) (*(const volatile __unqual_scalar_typeof(x) *)&(x)) ^ >> net/core/filter.c:4165:5: error: assigning to 'struct xdp_sock *' from incompatible type 'void' xs = READ_ONCE(dev->_rx[queue_id].xsk); ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 9 errors generated. vim +4165 net/core/filter.c 4157 4158 BPF_CALL_2(bpf_xdp_redirect_xsk, struct xdp_buff *, xdp, u64, action) 4159 { 4160 struct net_device *dev = xdp->rxq->dev; 4161 u32 queue_id = xdp->rxq->queue_index; 4162 struct bpf_redirect_info *ri; 4163 struct xdp_sock *xs; 4164 > 4165 xs = READ_ONCE(dev->_rx[queue_id].xsk); 4166 if (!xs) 4167 return action; 4168 4169 ri = this_cpu_ptr(&bpf_redirect_info); 4170 ri->tgt_type = XDP_REDIR_XSK; 4171 ri->tgt_value = xs; 4172 4173 return XDP_REDIRECT; 4174 } 4175 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Björn Töpel <bjorn.topel@gmail.com> writes: > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index c001766adcbc..bbc7d9a57262 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -3836,6 +3836,12 @@ union bpf_attr { > * Return > * A pointer to a struct socket on success or NULL if the file is > * not a socket. > + * > + * long bpf_redirect_xsk(struct xdp_buff *xdp_md, u64 action) > + * Description > + * Redirect to the registered AF_XDP socket. > + * Return > + * **XDP_REDIRECT** on success, otherwise the action parameter is returned. > */ I think it would be better to make the second argument a 'flags' argument and make values > XDP_TX invalid (like we do in bpf_xdp_redirect_map() now). By allowing any value as return you lose the ability to turn it into a flags argument later... -Toke
On 2021-01-20 13:50, Toke Høiland-Jørgensen wrote: > Björn Töpel <bjorn.topel@gmail.com> writes: > >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >> index c001766adcbc..bbc7d9a57262 100644 >> --- a/include/uapi/linux/bpf.h >> +++ b/include/uapi/linux/bpf.h >> @@ -3836,6 +3836,12 @@ union bpf_attr { >> * Return >> * A pointer to a struct socket on success or NULL if the file is >> * not a socket. >> + * >> + * long bpf_redirect_xsk(struct xdp_buff *xdp_md, u64 action) >> + * Description >> + * Redirect to the registered AF_XDP socket. >> + * Return >> + * **XDP_REDIRECT** on success, otherwise the action parameter is returned. >> */ > > I think it would be better to make the second argument a 'flags' > argument and make values > XDP_TX invalid (like we do in > bpf_xdp_redirect_map() now). By allowing any value as return you lose > the ability to turn it into a flags argument later... > Yes, but that adds a run-time check. I prefer this non-checked version, even though it is a bit less futureproof. Björn
Björn Töpel <bjorn.topel@intel.com> writes: > On 2021-01-20 13:50, Toke Høiland-Jørgensen wrote: >> Björn Töpel <bjorn.topel@gmail.com> writes: >> >>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >>> index c001766adcbc..bbc7d9a57262 100644 >>> --- a/include/uapi/linux/bpf.h >>> +++ b/include/uapi/linux/bpf.h >>> @@ -3836,6 +3836,12 @@ union bpf_attr { >>> * Return >>> * A pointer to a struct socket on success or NULL if the file is >>> * not a socket. >>> + * >>> + * long bpf_redirect_xsk(struct xdp_buff *xdp_md, u64 action) >>> + * Description >>> + * Redirect to the registered AF_XDP socket. >>> + * Return >>> + * **XDP_REDIRECT** on success, otherwise the action parameter is returned. >>> */ >> >> I think it would be better to make the second argument a 'flags' >> argument and make values > XDP_TX invalid (like we do in >> bpf_xdp_redirect_map() now). By allowing any value as return you lose >> the ability to turn it into a flags argument later... >> > > Yes, but that adds a run-time check. I prefer this non-checked version, > even though it is a bit less futureproof. That...seems a bit short-sighted? :) Can you actually see a difference in your performance numbers? -Toke
On 2021-01-20 15:54, Toke Høiland-Jørgensen wrote: > Björn Töpel <bjorn.topel@intel.com> writes: > >> On 2021-01-20 13:50, Toke Høiland-Jørgensen wrote: >>> Björn Töpel <bjorn.topel@gmail.com> writes: >>> >>>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >>>> index c001766adcbc..bbc7d9a57262 100644 >>>> --- a/include/uapi/linux/bpf.h >>>> +++ b/include/uapi/linux/bpf.h >>>> @@ -3836,6 +3836,12 @@ union bpf_attr { >>>> * Return >>>> * A pointer to a struct socket on success or NULL if the file is >>>> * not a socket. >>>> + * >>>> + * long bpf_redirect_xsk(struct xdp_buff *xdp_md, u64 action) >>>> + * Description >>>> + * Redirect to the registered AF_XDP socket. >>>> + * Return >>>> + * **XDP_REDIRECT** on success, otherwise the action parameter is returned. >>>> */ >>> >>> I think it would be better to make the second argument a 'flags' >>> argument and make values > XDP_TX invalid (like we do in >>> bpf_xdp_redirect_map() now). By allowing any value as return you lose >>> the ability to turn it into a flags argument later... >>> >> >> Yes, but that adds a run-time check. I prefer this non-checked version, >> even though it is a bit less futureproof. > > That...seems a bit short-sighted? :) > Can you actually see a difference in your performance numbers? > I would rather add an additional helper *if* we see the need for flags, instead of paying for that upfront. For me, BPF is about being able to specialize, and not having one call with tons of checks. (Related; Going forward, the growing switch() for redirect targets in xdp_do_redirect() is a concern for me...) And yes, even with all those fancy branch predictors, less instructions is still less. :-) (It shows in my ubenchs.) Björn
Björn Töpel <bjorn.topel@intel.com> writes: > On 2021-01-20 15:54, Toke Høiland-Jørgensen wrote: >> Björn Töpel <bjorn.topel@intel.com> writes: >> >>> On 2021-01-20 13:50, Toke Høiland-Jørgensen wrote: >>>> Björn Töpel <bjorn.topel@gmail.com> writes: >>>> >>>>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >>>>> index c001766adcbc..bbc7d9a57262 100644 >>>>> --- a/include/uapi/linux/bpf.h >>>>> +++ b/include/uapi/linux/bpf.h >>>>> @@ -3836,6 +3836,12 @@ union bpf_attr { >>>>> * Return >>>>> * A pointer to a struct socket on success or NULL if the file is >>>>> * not a socket. >>>>> + * >>>>> + * long bpf_redirect_xsk(struct xdp_buff *xdp_md, u64 action) >>>>> + * Description >>>>> + * Redirect to the registered AF_XDP socket. >>>>> + * Return >>>>> + * **XDP_REDIRECT** on success, otherwise the action parameter is returned. >>>>> */ >>>> >>>> I think it would be better to make the second argument a 'flags' >>>> argument and make values > XDP_TX invalid (like we do in >>>> bpf_xdp_redirect_map() now). By allowing any value as return you lose >>>> the ability to turn it into a flags argument later... >>>> >>> >>> Yes, but that adds a run-time check. I prefer this non-checked version, >>> even though it is a bit less futureproof. >> >> That...seems a bit short-sighted? :) >> Can you actually see a difference in your performance numbers? >> > > I would rather add an additional helper *if* we see the need for flags, > instead of paying for that upfront. For me, BPF is about being able to > specialize, and not having one call with tons of checks. I get that, I'm just pushing back because omitting a 'flags' argument is literally among the most frequent reasons for having to replace a syscall (see e.g., [0]) instead of extending it. And yeah, I do realise that the performance implications are different for XDP than for syscalls, but maintainability of the API is also important; it's all a tradeoff. This will be the third redirect helper variant for XDP and I'd hate for the fourth one to have to be bpf_redirect_xsk_flags() because it did turn out to be needed... (One potential concrete reason for this: I believe Magnus was talking about an API that would allow a BPF program to redirect a packet into more than one socket (cloning it in the process), or to redirect to a socket+another target. How would you do that with this new helper?) [0] https://lwn.net/Articles/585415/ > (Related; Going forward, the growing switch() for redirect targets in > xdp_do_redirect() is a concern for me...) > > And yes, even with all those fancy branch predictors, less instructions > is still less. :-) (It shows in my ubenchs.) Right, I do agree that the run-time performance hit of checking the flag sucks (along with being hard to check for, cf. our parallel discussion about version checks). So ideally this would be fixed by having the verifier enforce the argument ranges instead; but if we merge this without the runtime check now we can't add that later without potentially breaking programs... :( -Toke
On 2021-01-20 18:29, Toke Høiland-Jørgensen wrote: > Björn Töpel <bjorn.topel@intel.com> writes: > >> On 2021-01-20 15:54, Toke Høiland-Jørgensen wrote: >>> Björn Töpel <bjorn.topel@intel.com> writes: >>> >>>> On 2021-01-20 13:50, Toke Høiland-Jørgensen wrote: >>>>> Björn Töpel <bjorn.topel@gmail.com> writes: >>>>> >>>>>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >>>>>> index c001766adcbc..bbc7d9a57262 100644 >>>>>> --- a/include/uapi/linux/bpf.h >>>>>> +++ b/include/uapi/linux/bpf.h >>>>>> @@ -3836,6 +3836,12 @@ union bpf_attr { >>>>>> * Return >>>>>> * A pointer to a struct socket on success or NULL if the file is >>>>>> * not a socket. >>>>>> + * >>>>>> + * long bpf_redirect_xsk(struct xdp_buff *xdp_md, u64 action) >>>>>> + * Description >>>>>> + * Redirect to the registered AF_XDP socket. >>>>>> + * Return >>>>>> + * **XDP_REDIRECT** on success, otherwise the action parameter is returned. >>>>>> */ >>>>> >>>>> I think it would be better to make the second argument a 'flags' >>>>> argument and make values > XDP_TX invalid (like we do in >>>>> bpf_xdp_redirect_map() now). By allowing any value as return you lose >>>>> the ability to turn it into a flags argument later... >>>>> >>>> >>>> Yes, but that adds a run-time check. I prefer this non-checked version, >>>> even though it is a bit less futureproof. >>> >>> That...seems a bit short-sighted? :) >>> Can you actually see a difference in your performance numbers? >>> >> >> I would rather add an additional helper *if* we see the need for flags, >> instead of paying for that upfront. For me, BPF is about being able to >> specialize, and not having one call with tons of checks. > > I get that, I'm just pushing back because omitting a 'flags' argument is > literally among the most frequent reasons for having to replace a > syscall (see e.g., [0]) instead of extending it. And yeah, I do realise > that the performance implications are different for XDP than for > syscalls, but maintainability of the API is also important; it's all a > tradeoff. This will be the third redirect helper variant for XDP and I'd > hate for the fourth one to have to be bpf_redirect_xsk_flags() because > it did turn out to be needed... > > (One potential concrete reason for this: I believe Magnus was talking > about an API that would allow a BPF program to redirect a packet into > more than one socket (cloning it in the process), or to redirect to a > socket+another target. How would you do that with this new helper?) > > [0] https://lwn.net/Articles/585415/ > I have a bit of different view. One of the really nice parts about BPF is exactly specialization. A user can tailor the kernel do a specific thing. I *don't* see an issue with yet another helper, if that is needed in the future. I think that is better than bloated helpers trying to cope for all scenarios. I don't mean we should just add helpers all over the place, but I do see more lightly on adding helpers, than adding syscalls. Elaborating a bit on this: many device drivers try to handle all the things in the fast-path. I see BPF as one way forward to moving away from that. Setup what you need, and only run what you currently need, instead of the current "Is bleh on, then baz? Is this on, then that." So, I would like to avoid "future proofing" the helpers, if that makes sense. Use what you need. That's why BPF is so good (one of the things)! As for bpf_redirect_xsk() it's a leaner version of bpf_redirect_map(). You want flags/shared sockets/...? Well go use bpf_redirect_map() and XSKMAP. bpf_redirect_xsk() is not for you. A lot of back-and-forth for *one* if-statement, but it's kind of a design thing for me. ;-) Björn >> (Related; Going forward, the growing switch() for redirect targets in >> xdp_do_redirect() is a concern for me...) >> >> And yes, even with all those fancy branch predictors, less instructions >> is still less. :-) (It shows in my ubenchs.) > > Right, I do agree that the run-time performance hit of checking the flag > sucks (along with being hard to check for, cf. our parallel discussion > about version checks). So ideally this would be fixed by having the > verifier enforce the argument ranges instead; but if we merge this > without the runtime check now we can't add that later without > potentially breaking programs... :( > > -Toke >
Björn Töpel <bjorn.topel@intel.com> writes: > On 2021-01-20 18:29, Toke Høiland-Jørgensen wrote: >> Björn Töpel <bjorn.topel@intel.com> writes: >> >>> On 2021-01-20 15:54, Toke Høiland-Jørgensen wrote: >>>> Björn Töpel <bjorn.topel@intel.com> writes: >>>> >>>>> On 2021-01-20 13:50, Toke Høiland-Jørgensen wrote: >>>>>> Björn Töpel <bjorn.topel@gmail.com> writes: >>>>>> >>>>>>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >>>>>>> index c001766adcbc..bbc7d9a57262 100644 >>>>>>> --- a/include/uapi/linux/bpf.h >>>>>>> +++ b/include/uapi/linux/bpf.h >>>>>>> @@ -3836,6 +3836,12 @@ union bpf_attr { >>>>>>> * Return >>>>>>> * A pointer to a struct socket on success or NULL if the file is >>>>>>> * not a socket. >>>>>>> + * >>>>>>> + * long bpf_redirect_xsk(struct xdp_buff *xdp_md, u64 action) >>>>>>> + * Description >>>>>>> + * Redirect to the registered AF_XDP socket. >>>>>>> + * Return >>>>>>> + * **XDP_REDIRECT** on success, otherwise the action parameter is returned. >>>>>>> */ >>>>>> >>>>>> I think it would be better to make the second argument a 'flags' >>>>>> argument and make values > XDP_TX invalid (like we do in >>>>>> bpf_xdp_redirect_map() now). By allowing any value as return you lose >>>>>> the ability to turn it into a flags argument later... >>>>>> >>>>> >>>>> Yes, but that adds a run-time check. I prefer this non-checked version, >>>>> even though it is a bit less futureproof. >>>> >>>> That...seems a bit short-sighted? :) >>>> Can you actually see a difference in your performance numbers? >>>> >>> >>> I would rather add an additional helper *if* we see the need for flags, >>> instead of paying for that upfront. For me, BPF is about being able to >>> specialize, and not having one call with tons of checks. >> >> I get that, I'm just pushing back because omitting a 'flags' argument is >> literally among the most frequent reasons for having to replace a >> syscall (see e.g., [0]) instead of extending it. And yeah, I do realise >> that the performance implications are different for XDP than for >> syscalls, but maintainability of the API is also important; it's all a >> tradeoff. This will be the third redirect helper variant for XDP and I'd >> hate for the fourth one to have to be bpf_redirect_xsk_flags() because >> it did turn out to be needed... >> >> (One potential concrete reason for this: I believe Magnus was talking >> about an API that would allow a BPF program to redirect a packet into >> more than one socket (cloning it in the process), or to redirect to a >> socket+another target. How would you do that with this new helper?) >> >> [0] https://lwn.net/Articles/585415/ >> > > I have a bit of different view. One of the really nice parts about BPF > is exactly specialization. A user can tailor the kernel do a specific > thing. I *don't* see an issue with yet another helper, if that is needed > in the future. I think that is better than bloated helpers trying to > cope for all scenarios. I don't mean we should just add helpers all over > the place, but I do see more lightly on adding helpers, than adding > syscalls. > > Elaborating a bit on this: many device drivers try to handle all the > things in the fast-path. I see BPF as one way forward to moving away > from that. Setup what you need, and only run what you currently need, > instead of the current "Is bleh on, then baz? Is this on, then that." > > So, I would like to avoid "future proofing" the helpers, if that makes > sense. Use what you need. That's why BPF is so good (one of the > things)! Well, it's a tradeoff. We're still defining an API that should not be (too) confusing... > As for bpf_redirect_xsk() it's a leaner version of bpf_redirect_map(). > You want flags/shared sockets/...? Well go use bpf_redirect_map() and > XSKMAP. bpf_redirect_xsk() is not for you. This argument, however, I buy: bpf_redirect() is the single-purpose helper for redirecting to an ifindex, bpf_redirect_xsk() is the single-purpose helper for redirecting to an XSK, and bpf_redirect_map() is the generic one that does both of those and more. Fair enough, consider me convinced :) > A lot of back-and-forth for *one* if-statement, but it's kind of a > design thing for me. ;-) Surely you don't mean to imply that you have *better* things to do with your time than have a 10-emails-long argument over a single if statement? ;) -Toke
On Wed, Jan 20, 2021 at 12:26 PM Toke Høiland-Jørgensen <toke@redhat.com> wrote: > > This argument, however, I buy: bpf_redirect() is the single-purpose > helper for redirecting to an ifindex, bpf_redirect_xsk() is the > single-purpose helper for redirecting to an XSK, and bpf_redirect_map() > is the generic one that does both of those and more. Fair enough, > consider me convinced :) > > > A lot of back-and-forth for *one* if-statement, but it's kind of a > > design thing for me. ;-) > > Surely you don't mean to imply that you have *better* things to do with > your time than have a 10-emails-long argument over a single if > statement? ;) After reading this thread I think I have to pour cold water on the design. The performance blip comes from hard coded assumptions: + queue_id = xdp->rxq->queue_index; + xs = READ_ONCE(dev->_rx[queue_id].xsk); bpf can have specialized helpers, but imo this is beyond what's reasonable. Please move such things into the program and try to make bpf_redirect_map faster. Making af_xdp non-root is orthogonal. If there is actual need for that it has to be designed thoroughly and not presented as "this helper may help to do that". I don't think "may" will materialize unless people actually work toward the goal of non-root.
On 2021-01-20 22:15, Alexei Starovoitov wrote: > On Wed, Jan 20, 2021 at 12:26 PM Toke Høiland-Jørgensen <toke@redhat.com> wrote: >> >> This argument, however, I buy: bpf_redirect() is the single-purpose >> helper for redirecting to an ifindex, bpf_redirect_xsk() is the >> single-purpose helper for redirecting to an XSK, and bpf_redirect_map() >> is the generic one that does both of those and more. Fair enough, >> consider me convinced :) >> >>> A lot of back-and-forth for *one* if-statement, but it's kind of a >>> design thing for me. ;-) >> >> Surely you don't mean to imply that you have *better* things to do with >> your time than have a 10-emails-long argument over a single if >> statement? ;) > > After reading this thread I think I have to pour cold water on the design. > > The performance blip comes from hard coded assumptions: > + queue_id = xdp->rxq->queue_index; > + xs = READ_ONCE(dev->_rx[queue_id].xsk); > Yes, one can see this as a constrained map: * The map belongs to a certain netdev. * Each entry corresponds to a certain queue id. I.e if we do a successful (non-NULL) lookup, we *know* that all sockets in that map belong to the netdev, and has the correct queue id. By doing that we can get rid of two run-time checks: "Is the socket bound to this netdev?" and "Is this the correct queue id?". > bpf can have specialized helpers, but imo this is beyond what's reasonable. > Please move such things into the program and try to make > bpf_redirect_map faster. > I obviously prefer this path, and ideally combined with a way to even more specialize xdp_do_redirect(). Then again, you are the maintainer! :-) Maybe an alternative be adding a new type of XSKMAP constrained in the similar way as above, and continue with bpf_redirect_map(), but with this new map the index argument would be ignored. Unfortunately the BPF context (xdp_buff in this case) is not passed to bpf_redirect_map(), so getting the actual queue_id in the helper is hard. Adding the context as an additional argument would be a new helper... I'll need to think a bit more about it. Input/ideas are welcome! > Making af_xdp non-root is orthogonal. If there is actual need for that > it has to be designed thoroughly and not presented as "this helper may > help to do that". > I don't think "may" will materialize unless people actually work > toward the goal of non-root. > Fair enough! Same goal could be reached using the existing map approach. Björn
diff --git a/include/linux/filter.h b/include/linux/filter.h index 5fc336a271c2..3f9efbd08cba 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -648,6 +648,7 @@ enum xdp_redirect_type { XDP_REDIR_DEV_MAP, XDP_REDIR_CPU_MAP, XDP_REDIR_XSK_MAP, + XDP_REDIR_XSK, }; DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5b949076ed23..cb0e215e981c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -749,6 +749,7 @@ struct netdev_rx_queue { struct xdp_rxq_info xdp_rxq; #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; + struct xdp_sock *xsk; #endif } ____cacheline_aligned_in_smp; diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index cc17bc957548..97b21c483baf 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -77,8 +77,10 @@ struct xdp_sock { #ifdef CONFIG_XDP_SOCKETS int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); +int xsk_generic_redirect(struct net_device *dev, struct xdp_buff *xdp); int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); void __xsk_map_flush(void); +int xsk_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) @@ -100,6 +102,11 @@ static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) return -ENOTSUPP; } +static inline int xsk_generic_redirect(struct net_device *dev, struct xdp_buff *xdp) +{ + return -EOPNOTSUPP; +} + static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) { return -EOPNOTSUPP; @@ -115,6 +122,11 @@ static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, return NULL; } +static inline int xsk_redirect(struct net_device *dev, struct xdp_buff *xdp) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_XDP_SOCKETS */ #endif /* _LINUX_XDP_SOCK_H */ diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index eaa8386dbc63..bd531d561c60 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -84,7 +84,7 @@ struct xsk_buff_pool { /* AF_XDP core. */ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xdp_umem *umem); -int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev, +int xp_assign_dev(struct xdp_sock *xs, struct xsk_buff_pool *pool, struct net_device *dev, u16 queue_id, u16 flags); int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, struct net_device *dev, u16 queue_id); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c001766adcbc..bbc7d9a57262 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3836,6 +3836,12 @@ union bpf_attr { * Return * A pointer to a struct socket on success or NULL if the file is * not a socket. + * + * long bpf_redirect_xsk(struct xdp_buff *xdp_md, u64 action) + * Description + * Redirect to the registered AF_XDP socket. + * Return + * **XDP_REDIRECT** on success, otherwise the action parameter is returned. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4001,6 +4007,7 @@ union bpf_attr { FN(ktime_get_coarse_ns), \ FN(ima_inode_hash), \ FN(sock_from_file), \ + FN(redirect_xsk), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/net/core/filter.c b/net/core/filter.c index 5f31e21be531..b457c83fba70 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3977,6 +3977,9 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, case XDP_REDIR_XSK_MAP: err = __xsk_map_redirect(fwd, xdp); break; + case XDP_REDIR_XSK: + err = xsk_redirect(fwd, xdp); + break; default: err = -EBADRQC; } @@ -4044,25 +4047,33 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, ri->tgt_type = XDP_REDIR_UNSET; ri->tgt_value = NULL; - if (type == XDP_REDIR_DEV_IFINDEX) { + switch (type) { + case XDP_REDIR_DEV_IFINDEX: { fwd = dev_get_by_index_rcu(dev_net(dev), (u32)(long)fwd); if (unlikely(!fwd)) { err = -EINVAL; - goto err; + break; } err = xdp_ok_fwd_dev(fwd, skb->len); if (unlikely(err)) - goto err; + break; skb->dev = fwd; _trace_xdp_redirect(dev, xdp_prog, index); generic_xdp_tx(skb, xdp_prog); return 0; } + case XDP_REDIR_XSK: + err = xsk_generic_redirect(dev, xdp); + if (err) + break; + consume_skb(skb); + break; + default: + return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, type); + } - return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, type); -err: _trace_xdp_redirect_err(dev, xdp_prog, index, err); return err; } @@ -4144,6 +4155,32 @@ static const struct bpf_func_proto bpf_xdp_redirect_map_proto = { .arg3_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_xdp_redirect_xsk, struct xdp_buff *, xdp, u64, action) +{ + struct net_device *dev = xdp->rxq->dev; + u32 queue_id = xdp->rxq->queue_index; + struct bpf_redirect_info *ri; + struct xdp_sock *xs; + + xs = READ_ONCE(dev->_rx[queue_id].xsk); + if (!xs) + return action; + + ri = this_cpu_ptr(&bpf_redirect_info); + ri->tgt_type = XDP_REDIR_XSK; + ri->tgt_value = xs; + + return XDP_REDIRECT; +} + +static const struct bpf_func_proto bpf_xdp_redirect_xsk_proto = { + .func = bpf_xdp_redirect_xsk, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + static unsigned long bpf_skb_copy(void *dst_buff, const void *skb, unsigned long off, unsigned long len) { @@ -7260,6 +7297,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_tcp_gen_syncookie: return &bpf_tcp_gen_syncookie_proto; #endif + case BPF_FUNC_redirect_xsk: + return &bpf_xdp_redirect_xsk_proto; default: return bpf_sk_base_func_proto(func_id); } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 5820de65060b..79f1492e71e2 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -134,6 +134,28 @@ int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool, return 0; } +static struct xdp_sock *xsk_get_at_qid(struct net_device *dev, u16 queue_id) +{ + return READ_ONCE(dev->_rx[queue_id].xsk); +} + +static void xsk_clear(struct xdp_sock *xs) +{ + struct net_device *dev = xs->dev; + u16 queue_id = xs->queue_id; + + if (queue_id < dev->num_rx_queues) + WRITE_ONCE(dev->_rx[queue_id].xsk, NULL); +} + +static void xsk_reg(struct xdp_sock *xs) +{ + struct net_device *dev = xs->dev; + u16 queue_id = xs->queue_id; + + WRITE_ONCE(dev->_rx[queue_id].xsk, xs); +} + void xp_release(struct xdp_buff_xsk *xskb) { xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb; @@ -184,7 +206,7 @@ static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len) memcpy(to_buf, from_buf, len + metalen); } -static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) +static int ____xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { struct xdp_buff *xsk_xdp; int err; @@ -211,6 +233,22 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) return 0; } +static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) +{ + int err; + u32 len; + + if (likely(xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)) { + len = xdp->data_end - xdp->data; + return __xsk_rcv_zc(xs, xdp, len); + } + + err = ____xsk_rcv(xs, xdp); + if (!err) + xdp_return_buff(xdp); + return err; +} + static bool xsk_tx_writeable(struct xdp_sock *xs) { if (xskq_cons_present_entries(xs->tx) > xs->tx->nentries / 2) @@ -248,6 +286,39 @@ static void xsk_flush(struct xdp_sock *xs) sock_def_readable(&xs->sk); } +int xsk_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) +{ + struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); + int err; + + sk_mark_napi_id_once_xdp(&xs->sk, xdp); + err = __xsk_rcv(xs, xdp); + if (err) + return err; + + if (!xs->flush_node.prev) + list_add(&xs->flush_node, flush_list); + return 0; +} + +int xsk_generic_redirect(struct net_device *dev, struct xdp_buff *xdp) +{ + struct xdp_sock *xs; + u32 queue_id; + int err; + + queue_id = xdp->rxq->queue_index; + xs = xsk_get_at_qid(dev, queue_id); + if (!xs) + return -EINVAL; + + spin_lock_bh(&xs->rx_lock); + err = ____xsk_rcv(xs, xdp); + xsk_flush(xs); + spin_unlock_bh(&xs->rx_lock); + return err; +} + int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { int err; @@ -255,7 +326,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) spin_lock_bh(&xs->rx_lock); err = xsk_rcv_check(xs, xdp); if (!err) { - err = __xsk_rcv(xs, xdp); + err = ____xsk_rcv(xs, xdp); xsk_flush(xs); } spin_unlock_bh(&xs->rx_lock); @@ -264,22 +335,12 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { - int err; - u32 len; + int err = xsk_rcv_check(xs, xdp); - err = xsk_rcv_check(xs, xdp); if (err) return err; - if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) { - len = xdp->data_end - xdp->data; - return __xsk_rcv_zc(xs, xdp, len); - } - - err = __xsk_rcv(xs, xdp); - if (!err) - xdp_return_buff(xdp); - return err; + return __xsk_rcv(xs, xdp); } int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) @@ -661,6 +722,7 @@ static void xsk_unbind_dev(struct xdp_sock *xs) if (xs->state != XSK_BOUND) return; + xsk_clear(xs); WRITE_ONCE(xs->state, XSK_UNBOUND); /* Wait for driver to stop using the xdp socket. */ @@ -892,7 +954,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) goto out_unlock; } - err = xp_assign_dev(xs->pool, dev, qid, flags); + err = xp_assign_dev(xs, xs->pool, dev, qid, flags); if (err) { xp_destroy(xs->pool); xs->pool = NULL; @@ -918,6 +980,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) */ smp_wmb(); WRITE_ONCE(xs->state, XSK_BOUND); + xsk_reg(xs); } out_release: mutex_unlock(&xs->mutex); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 8de01aaac4a0..af02a69d0bf7 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -119,7 +119,7 @@ static void xp_disable_drv_zc(struct xsk_buff_pool *pool) } } -int xp_assign_dev(struct xsk_buff_pool *pool, +int xp_assign_dev(struct xdp_sock *xs, struct xsk_buff_pool *pool, struct net_device *netdev, u16 queue_id, u16 flags) { bool force_zc, force_copy; @@ -204,7 +204,7 @@ int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, if (pool->uses_need_wakeup) flags |= XDP_USE_NEED_WAKEUP; - return xp_assign_dev(pool, dev, queue_id, flags); + return xp_assign_dev(NULL, pool, dev, queue_id, flags); } void xp_clear_dev(struct xsk_buff_pool *pool) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c001766adcbc..bbc7d9a57262 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3836,6 +3836,12 @@ union bpf_attr { * Return * A pointer to a struct socket on success or NULL if the file is * not a socket. + * + * long bpf_redirect_xsk(struct xdp_buff *xdp_md, u64 action) + * Description + * Redirect to the registered AF_XDP socket. + * Return + * **XDP_REDIRECT** on success, otherwise the action parameter is returned. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4001,6 +4007,7 @@ union bpf_attr { FN(ktime_get_coarse_ns), \ FN(ima_inode_hash), \ FN(sock_from_file), \ + FN(redirect_xsk), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper