diff mbox series

[bpf-next,v5,3/8] bpf: Create links for BPF struct_ops maps.

Message ID 20230308005050.255859-4-kuifeng@meta.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Transit between BPF TCP congestion controls. | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-36 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-39 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-40 success Logs for test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-17
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 1787 this patch: 1789
netdev/cc_maintainers warning 7 maintainers not CCed: haoluo@google.com yhs@fb.com daniel@iogearbox.net john.fastabend@gmail.com kpsingh@kernel.org jolsa@kernel.org netdev@vger.kernel.org
netdev/build_clang success Errors and warnings before: 178 this patch: 178
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 1783 this patch: 1785
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Please use a blank line after function/struct/union/enum declarations WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Kui-Feng Lee March 8, 2023, 12:50 a.m. UTC
BPF struct_ops maps are employed directly to register TCP Congestion
Control algorithms. Unlike other BPF programs that terminate when
their links gone. The link of a BPF struct_ops map provides a uniform
experience akin to other types of BPF programs.

bpf_links are responsible for registering their associated
struct_ops. You can only use a struct_ops that has the BPF_F_LINK flag
set to create a bpf_link, while a structs without this flag behaves in
the same manner as before and is registered upon updating its value.

The BPF_LINK_TYPE_STRUCT_OPS serves a dual purpose. Not only is it
used to craft the links for BPF struct_ops programs, but also to
create links for BPF struct_ops them-self.  Since the links of BPF
struct_ops programs are only used to create trampolines internally,
they are never seen in other contexts. Thus, they can be reused for
struct_ops themself.

To maintain a reference to the map supporting this link, we add
bpf_struct_ops_link as an additional type. The pointer of the map is
RCU and won't be necessary until later in the patchset.

Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
---
 include/linux/bpf.h            |  11 +++
 include/uapi/linux/bpf.h       |  12 +++-
 kernel/bpf/bpf_struct_ops.c    | 124 +++++++++++++++++++++++++++++++--
 kernel/bpf/syscall.c           |  23 +++---
 tools/include/uapi/linux/bpf.h |  12 +++-
 5 files changed, 168 insertions(+), 14 deletions(-)

Comments

kernel test robot March 8, 2023, 3:01 p.m. UTC | #1
Hi Kui-Feng,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Kui-Feng-Lee/bpf-Retire-the-struct_ops-map-kvalue-refcnt/20230308-085434
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/20230308005050.255859-4-kuifeng%40meta.com
patch subject: [PATCH bpf-next v5 3/8] bpf: Create links for BPF struct_ops maps.
config: microblaze-randconfig-r005-20230306 (https://download.01.org/0day-ci/archive/20230308/202303082224.rf1Z7y3o-lkp@intel.com/config)
compiler: microblaze-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/de9e43a5ac82dde718d80d8347e867a8fc935e0a
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Kui-Feng-Lee/bpf-Retire-the-struct_ops-map-kvalue-refcnt/20230308-085434
        git checkout de9e43a5ac82dde718d80d8347e867a8fc935e0a
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=microblaze olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=microblaze SHELL=/bin/bash drivers/hid/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303082224.rf1Z7y3o-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from include/linux/hid_bpf.h:6,
                    from include/linux/hid.h:29,
                    from drivers/hid/hid-prodikeys.c:21:
   include/linux/bpf.h:2388:19: error: redefinition of 'bpf_struct_ops_link_create'
    2388 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/bpf.h:1592:19: note: previous definition of 'bpf_struct_ops_link_create' with type 'int(union bpf_attr *)'
    1592 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
   In file included from include/linux/bits.h:21,
                    from include/linux/ratelimit_types.h:5,
                    from include/linux/ratelimit.h:5,
                    from include/linux/dev_printk.h:16,
                    from include/linux/device.h:15,
                    from drivers/hid/hid-prodikeys.c:17:
>> include/linux/build_bug.h:16:51: error: bit-field '<anonymous>' width not an integer constant
      16 | #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
         |                                                   ^
   include/linux/compiler.h:232:33: note: in expansion of macro 'BUILD_BUG_ON_ZERO'
     232 | #define __must_be_array(a)      BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
         |                                 ^~~~~~~~~~~~~~~~~
   include/linux/kernel.h:55:59: note: in expansion of macro '__must_be_array'
      55 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
         |                                                           ^~~~~~~~~~~~~~~
   include/linux/moduleparam.h:517:20: note: in expansion of macro 'ARRAY_SIZE'
     517 |         = { .max = ARRAY_SIZE(array), .num = nump,                      \
         |                    ^~~~~~~~~~
   include/linux/moduleparam.h:501:9: note: in expansion of macro 'module_param_array_named'
     501 |         module_param_array_named(name, name, type, nump, perm)
         |         ^~~~~~~~~~~~~~~~~~~~~~~~
   drivers/hid/hid-prodikeys.c:90:1: note: in expansion of macro 'module_param_array'
      90 | module_param_array(index, int, NULL, 0444);
         | ^~~~~~~~~~~~~~~~~~
   drivers/hid/hid-prodikeys.c:86:12: warning: 'index' defined but not used [-Wunused-variable]
      86 | static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
         |            ^~~~~


vim +16 include/linux/build_bug.h

bc6245e5efd70c Ian Abbott       2017-07-10   6  
bc6245e5efd70c Ian Abbott       2017-07-10   7  #ifdef __CHECKER__
bc6245e5efd70c Ian Abbott       2017-07-10   8  #define BUILD_BUG_ON_ZERO(e) (0)
bc6245e5efd70c Ian Abbott       2017-07-10   9  #else /* __CHECKER__ */
bc6245e5efd70c Ian Abbott       2017-07-10  10  /*
bc6245e5efd70c Ian Abbott       2017-07-10  11   * Force a compilation error if condition is true, but also produce a
8788994376d84d Rikard Falkeborn 2019-12-04  12   * result (of value 0 and type int), so the expression can be used
bc6245e5efd70c Ian Abbott       2017-07-10  13   * e.g. in a structure initializer (or where-ever else comma expressions
bc6245e5efd70c Ian Abbott       2017-07-10  14   * aren't permitted).
bc6245e5efd70c Ian Abbott       2017-07-10  15   */
8788994376d84d Rikard Falkeborn 2019-12-04 @16  #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
527edbc18a70e7 Masahiro Yamada  2019-01-03  17  #endif /* __CHECKER__ */
527edbc18a70e7 Masahiro Yamada  2019-01-03  18
kernel test robot March 8, 2023, 3:32 p.m. UTC | #2
Hi Kui-Feng,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Kui-Feng-Lee/bpf-Retire-the-struct_ops-map-kvalue-refcnt/20230308-085434
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/20230308005050.255859-4-kuifeng%40meta.com
patch subject: [PATCH bpf-next v5 3/8] bpf: Create links for BPF struct_ops maps.
config: i386-randconfig-a012-20230306 (https://download.01.org/0day-ci/archive/20230308/202303082344.mxuydKat-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/de9e43a5ac82dde718d80d8347e867a8fc935e0a
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Kui-Feng-Lee/bpf-Retire-the-struct_ops-map-kvalue-refcnt/20230308-085434
        git checkout de9e43a5ac82dde718d80d8347e867a8fc935e0a
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/net/ethernet/intel/fm10k/ drivers/net/ethernet/intel/ixgbe/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303082344.mxuydKat-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from drivers/net/ethernet/intel/fm10k/fm10k_main.c:8:
   In file included from include/net/tcp.h:35:
   In file included from include/net/sock_reuseport.h:5:
   In file included from include/linux/filter.h:9:
   include/linux/bpf.h:2388:19: error: redefinition of 'bpf_struct_ops_link_create'
   static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
                     ^
   include/linux/bpf.h:1592:19: note: previous definition is here
   static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
                     ^
>> drivers/net/ethernet/intel/fm10k/fm10k_main.c:886:16: warning: division by zero is undefined [-Wdivision-by-zero]
           desc_flags |= FM10K_SET_FLAG(tx_flags, FM10K_TX_FLAGS_CSUM,
                         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/fm10k/fm10k_main.c:878:26: note: expanded from macro 'FM10K_SET_FLAG'
            ((u32)(_input & _flag) / (_flag / _result)))
                                   ^ ~~~~~~~~~~~~~~~~~
   1 warning and 1 error generated.
--
   In file included from drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:27:
   include/linux/bpf.h:2388:19: error: redefinition of 'bpf_struct_ops_link_create'
   static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
                     ^
   include/linux/bpf.h:1592:19: note: previous definition is here
   static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
                     ^
>> drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8223:14: warning: division by zero is undefined [-Wdivision-by-zero]
           cmd_type |= IXGBE_SET_FLAG(tx_flags, IXGBE_TX_FLAGS_HW_VLAN,
                       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8213:26: note: expanded from macro 'IXGBE_SET_FLAG'
            ((u32)(_input & _flag) / (_flag / _result)))
                                   ^ ~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8227:14: warning: division by zero is undefined [-Wdivision-by-zero]
           cmd_type |= IXGBE_SET_FLAG(tx_flags, IXGBE_TX_FLAGS_TSO,
                       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8213:26: note: expanded from macro 'IXGBE_SET_FLAG'
            ((u32)(_input & _flag) / (_flag / _result)))
                                   ^ ~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8231:14: warning: division by zero is undefined [-Wdivision-by-zero]
           cmd_type |= IXGBE_SET_FLAG(tx_flags, IXGBE_TX_FLAGS_TSTAMP,
                       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8213:26: note: expanded from macro 'IXGBE_SET_FLAG'
            ((u32)(_input & _flag) / (_flag / _result)))
                                   ^ ~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8235:14: warning: division by zero is undefined [-Wdivision-by-zero]
           cmd_type ^= IXGBE_SET_FLAG(skb->no_fcs, 1, IXGBE_ADVTXD_DCMD_IFCS);
                       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8213:26: note: expanded from macro 'IXGBE_SET_FLAG'
            ((u32)(_input & _flag) / (_flag / _result)))
                                   ^ ~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8246:19: warning: division by zero is undefined [-Wdivision-by-zero]
           olinfo_status |= IXGBE_SET_FLAG(tx_flags,
                            ^~~~~~~~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8213:26: note: expanded from macro 'IXGBE_SET_FLAG'
            ((u32)(_input & _flag) / (_flag / _result)))
                                   ^ ~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8251:19: warning: division by zero is undefined [-Wdivision-by-zero]
           olinfo_status |= IXGBE_SET_FLAG(tx_flags,
                            ^~~~~~~~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8213:26: note: expanded from macro 'IXGBE_SET_FLAG'
            ((u32)(_input & _flag) / (_flag / _result)))
                                   ^ ~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8256:19: warning: division by zero is undefined [-Wdivision-by-zero]
           olinfo_status |= IXGBE_SET_FLAG(tx_flags,
                            ^~~~~~~~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8213:26: note: expanded from macro 'IXGBE_SET_FLAG'
            ((u32)(_input & _flag) / (_flag / _result)))
                                   ^ ~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8264:19: warning: division by zero is undefined [-Wdivision-by-zero]
           olinfo_status |= IXGBE_SET_FLAG(tx_flags,
                            ^~~~~~~~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:8213:26: note: expanded from macro 'IXGBE_SET_FLAG'
            ((u32)(_input & _flag) / (_flag / _result)))
                                   ^ ~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:10807:46: warning: shift count >= width of type [-Wshift-count-overflow]
           err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
                                                       ^~~~~~~~~~~~~~~~
   include/linux/dma-mapping.h:76:54: note: expanded from macro 'DMA_BIT_MASK'
   #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
                                                        ^ ~~~
   9 warnings and 1 error generated.


vim +886 drivers/net/ethernet/intel/fm10k/fm10k_main.c

76a540d4728a37 Alexander Duyck 2014-09-20  874  
76a540d4728a37 Alexander Duyck 2014-09-20  875  #define FM10K_SET_FLAG(_input, _flag, _result) \
76a540d4728a37 Alexander Duyck 2014-09-20  876  	((_flag <= _result) ? \
76a540d4728a37 Alexander Duyck 2014-09-20  877  	 ((u32)(_input & _flag) * (_result / _flag)) : \
76a540d4728a37 Alexander Duyck 2014-09-20  878  	 ((u32)(_input & _flag) / (_flag / _result)))
76a540d4728a37 Alexander Duyck 2014-09-20  879  
76a540d4728a37 Alexander Duyck 2014-09-20  880  static u8 fm10k_tx_desc_flags(struct sk_buff *skb, u32 tx_flags)
76a540d4728a37 Alexander Duyck 2014-09-20  881  {
76a540d4728a37 Alexander Duyck 2014-09-20  882  	/* set type for advanced descriptor with frame checksum insertion */
76a540d4728a37 Alexander Duyck 2014-09-20  883  	u32 desc_flags = 0;
76a540d4728a37 Alexander Duyck 2014-09-20  884  
76a540d4728a37 Alexander Duyck 2014-09-20  885  	/* set checksum offload bits */
76a540d4728a37 Alexander Duyck 2014-09-20 @886  	desc_flags |= FM10K_SET_FLAG(tx_flags, FM10K_TX_FLAGS_CSUM,
76a540d4728a37 Alexander Duyck 2014-09-20  887  				     FM10K_TXD_FLAG_CSUM);
76a540d4728a37 Alexander Duyck 2014-09-20  888  
76a540d4728a37 Alexander Duyck 2014-09-20  889  	return desc_flags;
76a540d4728a37 Alexander Duyck 2014-09-20  890  }
76a540d4728a37 Alexander Duyck 2014-09-20  891
kernel test robot March 8, 2023, 4:03 p.m. UTC | #3
Hi Kui-Feng,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Kui-Feng-Lee/bpf-Retire-the-struct_ops-map-kvalue-refcnt/20230308-085434
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/20230308005050.255859-4-kuifeng%40meta.com
patch subject: [PATCH bpf-next v5 3/8] bpf: Create links for BPF struct_ops maps.
config: x86_64-defconfig (https://download.01.org/0day-ci/archive/20230308/202303082340.qYFHo45I-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/de9e43a5ac82dde718d80d8347e867a8fc935e0a
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Kui-Feng-Lee/bpf-Retire-the-struct_ops-map-kvalue-refcnt/20230308-085434
        git checkout de9e43a5ac82dde718d80d8347e867a8fc935e0a
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=x86_64 olddefconfig
        make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303082340.qYFHo45I-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from drivers/net/virtio_net.c:13:
>> include/linux/bpf.h:2388:19: error: redefinition of 'bpf_struct_ops_link_create'
    2388 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/bpf.h:1592:19: note: previous definition of 'bpf_struct_ops_link_create' with type 'int(union bpf_attr *)'
    1592 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
--
   In file included from include/linux/filter.h:9,
                    from include/net/sock_reuseport.h:5,
                    from include/net/tcp.h:35,
                    from net/ipv4/netfilter/nf_reject_ipv4.c:8:
>> include/linux/bpf.h:2388:19: error: redefinition of 'bpf_struct_ops_link_create'
    2388 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/bpf.h:1592:19: note: previous definition of 'bpf_struct_ops_link_create' with type 'int(union bpf_attr *)'
    1592 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
   net/ipv4/netfilter/nf_reject_ipv4.c: In function 'nf_send_reset':
   net/ipv4/netfilter/nf_reject_ipv4.c:244:23: warning: variable 'niph' set but not used [-Wunused-but-set-variable]
     244 |         struct iphdr *niph;
         |                       ^~~~
--
   In file included from include/linux/filter.h:9,
                    from kernel/bpf/core.c:21:
>> include/linux/bpf.h:2388:19: error: redefinition of 'bpf_struct_ops_link_create'
    2388 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/bpf.h:1592:19: note: previous definition of 'bpf_struct_ops_link_create' with type 'int(union bpf_attr *)'
    1592 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
   kernel/bpf/core.c:1632:12: warning: no previous prototype for 'bpf_probe_read_kernel' [-Wmissing-prototypes]
    1632 | u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
         |            ^~~~~~~~~~~~~~~~~~~~~
   kernel/bpf/core.c:2069:6: warning: no previous prototype for 'bpf_patch_call_args' [-Wmissing-prototypes]
    2069 | void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
         |      ^~~~~~~~~~~~~~~~~~~
--
   In file included from include/linux/filter.h:9,
                    from kernel/kallsyms.c:25:
>> include/linux/bpf.h:2388:19: error: redefinition of 'bpf_struct_ops_link_create'
    2388 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/bpf.h:1592:19: note: previous definition of 'bpf_struct_ops_link_create' with type 'int(union bpf_attr *)'
    1592 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
   kernel/kallsyms.c:663:12: warning: no previous prototype for 'arch_get_kallsym' [-Wmissing-prototypes]
     663 | int __weak arch_get_kallsym(unsigned int symnum, unsigned long *value,
         |            ^~~~~~~~~~~~~~~~
--
   In file included from include/linux/bpf-cgroup.h:5,
                    from net/socket.c:55:
>> include/linux/bpf.h:2388:19: error: redefinition of 'bpf_struct_ops_link_create'
    2388 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/bpf.h:1592:19: note: previous definition of 'bpf_struct_ops_link_create' with type 'int(union bpf_attr *)'
    1592 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
   net/socket.c: In function '__sys_getsockopt':
   net/socket.c:2300:13: warning: variable 'max_optlen' set but not used [-Wunused-but-set-variable]
    2300 |         int max_optlen;
         |             ^~~~~~~~~~
--
   In file included from net/ipv6/ip6_fib.c:18:
>> include/linux/bpf.h:2388:19: error: redefinition of 'bpf_struct_ops_link_create'
    2388 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/bpf.h:1592:19: note: previous definition of 'bpf_struct_ops_link_create' with type 'int(union bpf_attr *)'
    1592 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
   net/ipv6/ip6_fib.c: In function 'fib6_add':
   net/ipv6/ip6_fib.c:1378:32: warning: variable 'pn' set but not used [-Wunused-but-set-variable]
    1378 |         struct fib6_node *fn, *pn = NULL;
         |                                ^~
--
   In file included from include/linux/filter.h:9,
                    from include/net/sock_reuseport.h:5,
                    from include/net/tcp.h:35,
                    from include/linux/netfilter_ipv6.h:11,
                    from net/ipv6/netfilter/nf_reject_ipv6.c:12:
>> include/linux/bpf.h:2388:19: error: redefinition of 'bpf_struct_ops_link_create'
    2388 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/bpf.h:1592:19: note: previous definition of 'bpf_struct_ops_link_create' with type 'int(union bpf_attr *)'
    1592 | static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~
   net/ipv6/netfilter/nf_reject_ipv6.c: In function 'nf_send_reset6':
   net/ipv6/netfilter/nf_reject_ipv6.c:287:25: warning: variable 'ip6h' set but not used [-Wunused-but-set-variable]
     287 |         struct ipv6hdr *ip6h;
         |                         ^~~~


vim +/bpf_struct_ops_link_create +2388 include/linux/bpf.h

  2387	
> 2388	static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
  2389	{
  2390		return -EOPNOTSUPP;
  2391	}
  2392
Martin KaFai Lau March 8, 2023, 8:04 p.m. UTC | #4
On 3/7/23 4:50 PM, Kui-Feng Lee wrote:
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 00b6e1a2edaf..afca6c526fe4 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1548,6 +1548,7 @@ static inline void bpf_module_put(const void *data, struct module *owner)
>   	else
>   		module_put(owner);
>   }
> +int bpf_struct_ops_link_create(union bpf_attr *attr);
>   
>   #ifdef CONFIG_NET
>   /* Define it here to avoid the use of forward declaration */
> @@ -1588,6 +1589,11 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
>   {
>   	return -EINVAL;
>   }
> +static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
>   #endif
>   
>   #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
> @@ -2379,6 +2385,11 @@ static inline void bpf_link_put(struct bpf_link *link)
>   {
>   }
>   
> +static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
> +{
> +	return -EOPNOTSUPP;
> +}

The inline version is double defined. It does not look right. Please double check.

> +
>   static inline int bpf_obj_get_user(const char __user *pathname, int flags)
>   {
>   	return -EOPNOTSUPP;
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 976b194eb775..f9fc7b8af3c4 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1033,6 +1033,7 @@ enum bpf_attach_type {
>   	BPF_PERF_EVENT,
>   	BPF_TRACE_KPROBE_MULTI,
>   	BPF_LSM_CGROUP,
> +	BPF_STRUCT_OPS,
>   	__MAX_BPF_ATTACH_TYPE
>   };
>   
> @@ -1266,6 +1267,9 @@ enum {
>   
>   /* Create a map that is suitable to be an inner map with dynamic max entries */
>   	BPF_F_INNER_MAP		= (1U << 12),
> +
> +/* Create a map that will be registered/unregesitered by the backed bpf_link */
> +	BPF_F_LINK		= (1U << 13),
>   };
>   
>   /* Flags for BPF_PROG_QUERY. */
> @@ -1507,7 +1511,10 @@ union bpf_attr {
>   	} task_fd_query;
>   
>   	struct { /* struct used by BPF_LINK_CREATE command */
> -		__u32		prog_fd;	/* eBPF program to attach */
> +		union {
> +			__u32		prog_fd;	/* eBPF program to attach */
> +			__u32		map_fd;		/* struct_ops to attach */
> +		};
>   		union {
>   			__u32		target_fd;	/* object to attach to */
>   			__u32		target_ifindex; /* target ifindex */
> @@ -6379,6 +6386,9 @@ struct bpf_link_info {
>   		struct {
>   			__u32 ifindex;
>   		} xdp;
> +		struct {
> +			__u32 map_id;
> +		} struct_ops;
>   	};
>   } __attribute__((aligned(8)));
>   
> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> index 9e097fcc9cf4..5a7e86cf67b5 100644
> --- a/kernel/bpf/bpf_struct_ops.c
> +++ b/kernel/bpf/bpf_struct_ops.c
> @@ -16,6 +16,7 @@ enum bpf_struct_ops_state {
>   	BPF_STRUCT_OPS_STATE_INIT,
>   	BPF_STRUCT_OPS_STATE_INUSE,
>   	BPF_STRUCT_OPS_STATE_TOBEFREE,
> +	BPF_STRUCT_OPS_STATE_READY,
>   };
>   
>   #define BPF_STRUCT_OPS_COMMON_VALUE			\
> @@ -58,6 +59,11 @@ struct bpf_struct_ops_map {
>   	struct bpf_struct_ops_value kvalue;
>   };
>   
> +struct bpf_struct_ops_link {
> +	struct bpf_link link;
> +	struct bpf_map __rcu *map;
> +};
> +
>   static DEFINE_MUTEX(update_mutex);
>   
>   #define VALUE_PREFIX "bpf_struct_ops_"
> @@ -496,11 +502,24 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   		*(unsigned long *)(udata + moff) = prog->aux->id;
>   	}
>   
> -	bpf_map_inc(map);
> -
>   	set_memory_rox((long)st_map->image, 1);
> +	if (st_map->map.map_flags & BPF_F_LINK) {
> +		if (st_ops->validate) {
> +			err = st_ops->validate(kdata);
> +			if (err)
> +				goto unlock;

This should at least be 'goto reset_unlock' to release the progs.

set_memory_rox(..., 1) should also be done after validate?

> +		}
> +		/* Let bpf_link handle registration & unregistration.
> +		 *
> +		 * Pair with smp_load_acquire() during lookup_elem().
> +		 */
> +		smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_READY);
> +		goto unlock;
> +	}
> +
>   	err = st_ops->reg(kdata);
>   	if (likely(!err)) {
> +		bpf_map_inc(map);
>   		/* Pair with smp_load_acquire() during lookup_elem().
>   		 * It ensures the above udata updates (e.g. prog->aux->id)
>   		 * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set.
> @@ -516,7 +535,6 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   	 */
>   	set_memory_nx((long)st_map->image, 1);
>   	set_memory_rw((long)st_map->image, 1);
> -	bpf_map_put(map);
>   
>   reset_unlock:
>   	bpf_struct_ops_map_put_progs(st_map);
> @@ -534,6 +552,9 @@ static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
>   	struct bpf_struct_ops_map *st_map;
>   
>   	st_map = (struct bpf_struct_ops_map *)map;
> +	if (st_map->map.map_flags & BPF_F_LINK)
> +		return -EOPNOTSUPP;
> +
>   	prev_state = cmpxchg(&st_map->kvalue.state,
>   			     BPF_STRUCT_OPS_STATE_INUSE,
>   			     BPF_STRUCT_OPS_STATE_TOBEFREE);
> @@ -601,7 +622,7 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
>   static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
>   {
>   	if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 ||
> -	    attr->map_flags || !attr->btf_vmlinux_value_type_id)
> +	    (attr->map_flags & ~BPF_F_LINK) || !attr->btf_vmlinux_value_type_id)
>   		return -EINVAL;
>   	return 0;
>   }
> @@ -712,3 +733,98 @@ void bpf_struct_ops_put(const void *kdata)
>   
>   	bpf_map_put(&st_map->map);
>   }
> +
> +static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
> +{
> +	struct bpf_struct_ops_link *st_link;
> +	struct bpf_struct_ops_map *st_map;
> +
> +	st_link = container_of(link, struct bpf_struct_ops_link, link);
> +	st_map = (struct bpf_struct_ops_map *)st_link->map;

/* protected by refcnt and no one is replacing it */
rcu_dereference_protected(st_link->map, true);

st_link->map is with __rcu. It should have warning when compile with 'make C=1 
...'. Patchwork also reports this: 
https://patchwork.kernel.org/project/netdevbpf/patch/20230308005050.255859-4-kuifeng@meta.com/. 
Please pay attention to patchwork for errors.

> +	st_map->st_ops->unreg(&st_map->kvalue.data);
> +	bpf_map_put(st_link->map);

Same here. Reading __rcu pointer without rcu_dereference_xxx.

or simply use &st_map->map here. Otherwise, it will also have type mismatch warning.

> +	kfree(st_link);
> +}
> +
> +static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
> +					    struct seq_file *seq)
> +{
> +	struct bpf_struct_ops_link *st_link;
> +	struct bpf_map *map;
> +
> +	st_link = container_of(link, struct bpf_struct_ops_link, link);
> +	rcu_read_lock();
> +	map = rcu_dereference(st_link->map);
> +	if (map)

map cannot be NULL?

> +		seq_printf(seq, "map_id:\t%d\n", map->id);
> +	rcu_read_unlock();
> +}
> +
> +static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
> +					       struct bpf_link_info *info)
> +{
> +	struct bpf_struct_ops_link *st_link;
> +	struct bpf_map *map;
> +
> +	st_link = container_of(link, struct bpf_struct_ops_link, link);
> +	rcu_read_lock();
> +	map = rcu_dereference(st_link->map);
> +	if (map)

Same here.

> +		info->struct_ops.map_id = map->id;
> +	rcu_read_unlock();
> +	return 0;
> +}
> +
> +static const struct bpf_link_ops bpf_struct_ops_map_lops = {
> +	.dealloc = bpf_struct_ops_map_link_dealloc,
> +	.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
> +	.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
> +};
> +
> +int bpf_struct_ops_link_create(union bpf_attr *attr)
> +{
> +	struct bpf_struct_ops_link *link = NULL;
> +	struct bpf_link_primer link_primer;
> +	struct bpf_struct_ops_map *st_map;
> +	struct bpf_map *map;
> +	int err;
> +
> +	map = bpf_map_get(attr->link_create.map_fd);
> +	if (!map)
> +		return -EINVAL;
> +
> +	st_map = (struct bpf_struct_ops_map *)map;
> +
> +	if (map->map_type != BPF_MAP_TYPE_STRUCT_OPS || !(map->map_flags & BPF_F_LINK) ||
> +	    /* Pair with smp_store_release() during map_update */
> +	    smp_load_acquire(&st_map->kvalue.state) != BPF_STRUCT_OPS_STATE_READY) {
> +		err = -EINVAL;
> +		goto err_out;
> +	}
> +
> +	link = kzalloc(sizeof(*link), GFP_USER);
> +	if (!link) {
> +		err = -ENOMEM;
> +		goto err_out;
> +	}
> +	bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops, NULL);
> +	RCU_INIT_POINTER(link->map, map);
> +
> +	err = bpf_link_prime(&link->link, &link_primer);
> +	if (err)
> +		goto err_out;
> +
> +	err = st_map->st_ops->reg(st_map->kvalue.data);
> +	if (err) {
> +		bpf_link_cleanup(&link_primer);
> +		goto err_out;
> +	}
> +
> +	return bpf_link_settle(&link_primer);
> +
> +err_out:
> +	bpf_map_put(map);
> +	kfree(link);
> +	return err;
> +}
> +
Kui-Feng Lee March 8, 2023, 11:46 p.m. UTC | #5
On 3/8/23 12:04, Martin KaFai Lau wrote:
> On 3/7/23 4:50 PM, Kui-Feng Lee wrote:
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 00b6e1a2edaf..afca6c526fe4 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -1548,6 +1548,7 @@ static inline void bpf_module_put(const void 
>> *data, struct module *owner)
>>       else
>>           module_put(owner);
>>   }
>> +int bpf_struct_ops_link_create(union bpf_attr *attr);
>>   #ifdef CONFIG_NET
>>   /* Define it here to avoid the use of forward declaration */
>> @@ -1588,6 +1589,11 @@ static inline int 
>> bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
>>   {
>>       return -EINVAL;
>>   }
>> +static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
>> +{
>> +    return -EOPNOTSUPP;
>> +}
>> +
>>   #endif
>>   #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
>> @@ -2379,6 +2385,11 @@ static inline void bpf_link_put(struct bpf_link 
>> *link)
>>   {
>>   }
>> +static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
>> +{
>> +    return -EOPNOTSUPP;
>> +}
> 
> The inline version is double defined. It does not look right. Please 
> double check.

Removed!

> 
>> +
>>   static inline int bpf_obj_get_user(const char __user *pathname, int 
>> flags)
>>   {
>>       return -EOPNOTSUPP;
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 976b194eb775..f9fc7b8af3c4 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -1033,6 +1033,7 @@ enum bpf_attach_type {
>>       BPF_PERF_EVENT,
>>       BPF_TRACE_KPROBE_MULTI,
>>       BPF_LSM_CGROUP,
>> +    BPF_STRUCT_OPS,
>>       __MAX_BPF_ATTACH_TYPE
>>   };
>> @@ -1266,6 +1267,9 @@ enum {
>>   /* Create a map that is suitable to be an inner map with dynamic max 
>> entries */
>>       BPF_F_INNER_MAP        = (1U << 12),
>> +
>> +/* Create a map that will be registered/unregesitered by the backed 
>> bpf_link */
>> +    BPF_F_LINK        = (1U << 13),
>>   };
>>   /* Flags for BPF_PROG_QUERY. */
>> @@ -1507,7 +1511,10 @@ union bpf_attr {
>>       } task_fd_query;
>>       struct { /* struct used by BPF_LINK_CREATE command */
>> -        __u32        prog_fd;    /* eBPF program to attach */
>> +        union {
>> +            __u32        prog_fd;    /* eBPF program to attach */
>> +            __u32        map_fd;        /* struct_ops to attach */
>> +        };
>>           union {
>>               __u32        target_fd;    /* object to attach to */
>>               __u32        target_ifindex; /* target ifindex */
>> @@ -6379,6 +6386,9 @@ struct bpf_link_info {
>>           struct {
>>               __u32 ifindex;
>>           } xdp;
>> +        struct {
>> +            __u32 map_id;
>> +        } struct_ops;
>>       };
>>   } __attribute__((aligned(8)));
>> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
>> index 9e097fcc9cf4..5a7e86cf67b5 100644
>> --- a/kernel/bpf/bpf_struct_ops.c
>> +++ b/kernel/bpf/bpf_struct_ops.c
>> @@ -16,6 +16,7 @@ enum bpf_struct_ops_state {
>>       BPF_STRUCT_OPS_STATE_INIT,
>>       BPF_STRUCT_OPS_STATE_INUSE,
>>       BPF_STRUCT_OPS_STATE_TOBEFREE,
>> +    BPF_STRUCT_OPS_STATE_READY,
>>   };
>>   #define BPF_STRUCT_OPS_COMMON_VALUE            \
>> @@ -58,6 +59,11 @@ struct bpf_struct_ops_map {
>>       struct bpf_struct_ops_value kvalue;
>>   };
>> +struct bpf_struct_ops_link {
>> +    struct bpf_link link;
>> +    struct bpf_map __rcu *map;
>> +};
>> +
>>   static DEFINE_MUTEX(update_mutex);
>>   #define VALUE_PREFIX "bpf_struct_ops_"
>> @@ -496,11 +502,24 @@ static int bpf_struct_ops_map_update_elem(struct 
>> bpf_map *map, void *key,
>>           *(unsigned long *)(udata + moff) = prog->aux->id;
>>       }
>> -    bpf_map_inc(map);
>> -
>>       set_memory_rox((long)st_map->image, 1);
>> +    if (st_map->map.map_flags & BPF_F_LINK) {
>> +        if (st_ops->validate) {
>> +            err = st_ops->validate(kdata);
>> +            if (err)
>> +                goto unlock;
> 
> This should at least be 'goto reset_unlock' to release the progs.
> 
> set_memory_rox(..., 1) should also be done after validate?

Yes, it should go to reset_unlock.  In that case, set_memory_rox()
should be called after the check.


> 
>> +        }
>> +        /* Let bpf_link handle registration & unregistration.
>> +         *
>> +         * Pair with smp_load_acquire() during lookup_elem().
>> +         */
>> +        smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_READY);
>> +        goto unlock;
>> +    }
>> +
>>       err = st_ops->reg(kdata);
>>       if (likely(!err)) {
>> +        bpf_map_inc(map);
>>           /* Pair with smp_load_acquire() during lookup_elem().
>>            * It ensures the above udata updates (e.g. prog->aux->id)
>>            * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set.
>> @@ -516,7 +535,6 @@ static int bpf_struct_ops_map_update_elem(struct 
>> bpf_map *map, void *key,
>>        */
>>       set_memory_nx((long)st_map->image, 1);
>>       set_memory_rw((long)st_map->image, 1);
>> -    bpf_map_put(map);
>>   reset_unlock:
>>       bpf_struct_ops_map_put_progs(st_map);
>> @@ -534,6 +552,9 @@ static int bpf_struct_ops_map_delete_elem(struct 
>> bpf_map *map, void *key)
>>       struct bpf_struct_ops_map *st_map;
>>       st_map = (struct bpf_struct_ops_map *)map;
>> +    if (st_map->map.map_flags & BPF_F_LINK)
>> +        return -EOPNOTSUPP;
>> +
>>       prev_state = cmpxchg(&st_map->kvalue.state,
>>                    BPF_STRUCT_OPS_STATE_INUSE,
>>                    BPF_STRUCT_OPS_STATE_TOBEFREE);
>> @@ -601,7 +622,7 @@ static void bpf_struct_ops_map_free(struct bpf_map 
>> *map)
>>   static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
>>   {
>>       if (attr->key_size != sizeof(unsigned int) || attr->max_entries 
>> != 1 ||
>> -        attr->map_flags || !attr->btf_vmlinux_value_type_id)
>> +        (attr->map_flags & ~BPF_F_LINK) || 
>> !attr->btf_vmlinux_value_type_id)
>>           return -EINVAL;
>>       return 0;
>>   }
>> @@ -712,3 +733,98 @@ void bpf_struct_ops_put(const void *kdata)
>>       bpf_map_put(&st_map->map);
>>   }
>> +
>> +static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
>> +{
>> +    struct bpf_struct_ops_link *st_link;
>> +    struct bpf_struct_ops_map *st_map;
>> +
>> +    st_link = container_of(link, struct bpf_struct_ops_link, link);
>> +    st_map = (struct bpf_struct_ops_map *)st_link->map;
> 
> /* protected by refcnt and no one is replacing it */
> rcu_dereference_protected(st_link->map, true);
> 
> st_link->map is with __rcu. It should have warning when compile with 
> 'make C=1 ...'. Patchwork also reports this: 
> https://patchwork.kernel.org/project/netdevbpf/patch/20230308005050.255859-4-kuifeng@meta.com/. Please pay attention to patchwork for errors.
> 
>> +    st_map->st_ops->unreg(&st_map->kvalue.data);
>> +    bpf_map_put(st_link->map);
> 
> Same here. Reading __rcu pointer without rcu_dereference_xxx.

According to the discussion offline, rcu_dereference_protected() will be
called.

> 
> or simply use &st_map->map here. Otherwise, it will also have type 
> mismatch warning.
> 
>> +    kfree(st_link);
>> +}
>> +
>> +static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link 
>> *link,
>> +                        struct seq_file *seq)
>> +{
>> +    struct bpf_struct_ops_link *st_link;
>> +    struct bpf_map *map;
>> +
>> +    st_link = container_of(link, struct bpf_struct_ops_link, link);
>> +    rcu_read_lock();
>> +    map = rcu_dereference(st_link->map);
>> +    if (map)
> 
> map cannot be NULL?

It should not be now after removing detach feature.

> 
>> +        seq_printf(seq, "map_id:\t%d\n", map->id);
>> +    rcu_read_unlock();
>> +}
>> +
>> +static int bpf_struct_ops_map_link_fill_link_info(const struct 
>> bpf_link *link,
>> +                           struct bpf_link_info *info)
>> +{
>> +    struct bpf_struct_ops_link *st_link;
>> +    struct bpf_map *map;
>> +
>> +    st_link = container_of(link, struct bpf_struct_ops_link, link);
>> +    rcu_read_lock();
>> +    map = rcu_dereference(st_link->map);
>> +    if (map)
> 
> Same here.

Ack.

> 
>> +        info->struct_ops.map_id = map->id;
>> +    rcu_read_unlock();
>> +    return 0;
>> +}
>> +
>> +static const struct bpf_link_ops bpf_struct_ops_map_lops = {
>> +    .dealloc = bpf_struct_ops_map_link_dealloc,
>> +    .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
>> +    .fill_link_info = bpf_struct_ops_map_link_fill_link_info,
>> +};
>> +
>> +int bpf_struct_ops_link_create(union bpf_attr *attr)
>> +{
>> +    struct bpf_struct_ops_link *link = NULL;
>> +    struct bpf_link_primer link_primer;
>> +    struct bpf_struct_ops_map *st_map;
>> +    struct bpf_map *map;
>> +    int err;
>> +
>> +    map = bpf_map_get(attr->link_create.map_fd);
>> +    if (!map)
>> +        return -EINVAL;
>> +
>> +    st_map = (struct bpf_struct_ops_map *)map;
>> +
>> +    if (map->map_type != BPF_MAP_TYPE_STRUCT_OPS || !(map->map_flags 
>> & BPF_F_LINK) ||
>> +        /* Pair with smp_store_release() during map_update */
>> +        smp_load_acquire(&st_map->kvalue.state) != 
>> BPF_STRUCT_OPS_STATE_READY) {
>> +        err = -EINVAL;
>> +        goto err_out;
>> +    }
>> +
>> +    link = kzalloc(sizeof(*link), GFP_USER);
>> +    if (!link) {
>> +        err = -ENOMEM;
>> +        goto err_out;
>> +    }
>> +    bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, 
>> &bpf_struct_ops_map_lops, NULL);
>> +    RCU_INIT_POINTER(link->map, map);
>> +
>> +    err = bpf_link_prime(&link->link, &link_primer);
>> +    if (err)
>> +        goto err_out;
>> +
>> +    err = st_map->st_ops->reg(st_map->kvalue.data);
>> +    if (err) {
>> +        bpf_link_cleanup(&link_primer);
>> +        goto err_out;
>> +    }
>> +
>> +    return bpf_link_settle(&link_primer);
>> +
>> +err_out:
>> +    bpf_map_put(map);
>> +    kfree(link);
>> +    return err;
>> +}
>> +
>
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 00b6e1a2edaf..afca6c526fe4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1548,6 +1548,7 @@  static inline void bpf_module_put(const void *data, struct module *owner)
 	else
 		module_put(owner);
 }
+int bpf_struct_ops_link_create(union bpf_attr *attr);
 
 #ifdef CONFIG_NET
 /* Define it here to avoid the use of forward declaration */
@@ -1588,6 +1589,11 @@  static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
 {
 	return -EINVAL;
 }
+static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif
 
 #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
@@ -2379,6 +2385,11 @@  static inline void bpf_link_put(struct bpf_link *link)
 {
 }
 
+static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int bpf_obj_get_user(const char __user *pathname, int flags)
 {
 	return -EOPNOTSUPP;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 976b194eb775..f9fc7b8af3c4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1033,6 +1033,7 @@  enum bpf_attach_type {
 	BPF_PERF_EVENT,
 	BPF_TRACE_KPROBE_MULTI,
 	BPF_LSM_CGROUP,
+	BPF_STRUCT_OPS,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -1266,6 +1267,9 @@  enum {
 
 /* Create a map that is suitable to be an inner map with dynamic max entries */
 	BPF_F_INNER_MAP		= (1U << 12),
+
+/* Create a map that will be registered/unregesitered by the backed bpf_link */
+	BPF_F_LINK		= (1U << 13),
 };
 
 /* Flags for BPF_PROG_QUERY. */
@@ -1507,7 +1511,10 @@  union bpf_attr {
 	} task_fd_query;
 
 	struct { /* struct used by BPF_LINK_CREATE command */
-		__u32		prog_fd;	/* eBPF program to attach */
+		union {
+			__u32		prog_fd;	/* eBPF program to attach */
+			__u32		map_fd;		/* struct_ops to attach */
+		};
 		union {
 			__u32		target_fd;	/* object to attach to */
 			__u32		target_ifindex; /* target ifindex */
@@ -6379,6 +6386,9 @@  struct bpf_link_info {
 		struct {
 			__u32 ifindex;
 		} xdp;
+		struct {
+			__u32 map_id;
+		} struct_ops;
 	};
 } __attribute__((aligned(8)));
 
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 9e097fcc9cf4..5a7e86cf67b5 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -16,6 +16,7 @@  enum bpf_struct_ops_state {
 	BPF_STRUCT_OPS_STATE_INIT,
 	BPF_STRUCT_OPS_STATE_INUSE,
 	BPF_STRUCT_OPS_STATE_TOBEFREE,
+	BPF_STRUCT_OPS_STATE_READY,
 };
 
 #define BPF_STRUCT_OPS_COMMON_VALUE			\
@@ -58,6 +59,11 @@  struct bpf_struct_ops_map {
 	struct bpf_struct_ops_value kvalue;
 };
 
+struct bpf_struct_ops_link {
+	struct bpf_link link;
+	struct bpf_map __rcu *map;
+};
+
 static DEFINE_MUTEX(update_mutex);
 
 #define VALUE_PREFIX "bpf_struct_ops_"
@@ -496,11 +502,24 @@  static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 		*(unsigned long *)(udata + moff) = prog->aux->id;
 	}
 
-	bpf_map_inc(map);
-
 	set_memory_rox((long)st_map->image, 1);
+	if (st_map->map.map_flags & BPF_F_LINK) {
+		if (st_ops->validate) {
+			err = st_ops->validate(kdata);
+			if (err)
+				goto unlock;
+		}
+		/* Let bpf_link handle registration & unregistration.
+		 *
+		 * Pair with smp_load_acquire() during lookup_elem().
+		 */
+		smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_READY);
+		goto unlock;
+	}
+
 	err = st_ops->reg(kdata);
 	if (likely(!err)) {
+		bpf_map_inc(map);
 		/* Pair with smp_load_acquire() during lookup_elem().
 		 * It ensures the above udata updates (e.g. prog->aux->id)
 		 * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set.
@@ -516,7 +535,6 @@  static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 	 */
 	set_memory_nx((long)st_map->image, 1);
 	set_memory_rw((long)st_map->image, 1);
-	bpf_map_put(map);
 
 reset_unlock:
 	bpf_struct_ops_map_put_progs(st_map);
@@ -534,6 +552,9 @@  static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
 	struct bpf_struct_ops_map *st_map;
 
 	st_map = (struct bpf_struct_ops_map *)map;
+	if (st_map->map.map_flags & BPF_F_LINK)
+		return -EOPNOTSUPP;
+
 	prev_state = cmpxchg(&st_map->kvalue.state,
 			     BPF_STRUCT_OPS_STATE_INUSE,
 			     BPF_STRUCT_OPS_STATE_TOBEFREE);
@@ -601,7 +622,7 @@  static void bpf_struct_ops_map_free(struct bpf_map *map)
 static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
 {
 	if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 ||
-	    attr->map_flags || !attr->btf_vmlinux_value_type_id)
+	    (attr->map_flags & ~BPF_F_LINK) || !attr->btf_vmlinux_value_type_id)
 		return -EINVAL;
 	return 0;
 }
@@ -712,3 +733,98 @@  void bpf_struct_ops_put(const void *kdata)
 
 	bpf_map_put(&st_map->map);
 }
+
+static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
+{
+	struct bpf_struct_ops_link *st_link;
+	struct bpf_struct_ops_map *st_map;
+
+	st_link = container_of(link, struct bpf_struct_ops_link, link);
+	st_map = (struct bpf_struct_ops_map *)st_link->map;
+	st_map->st_ops->unreg(&st_map->kvalue.data);
+	bpf_map_put(st_link->map);
+	kfree(st_link);
+}
+
+static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
+					    struct seq_file *seq)
+{
+	struct bpf_struct_ops_link *st_link;
+	struct bpf_map *map;
+
+	st_link = container_of(link, struct bpf_struct_ops_link, link);
+	rcu_read_lock();
+	map = rcu_dereference(st_link->map);
+	if (map)
+		seq_printf(seq, "map_id:\t%d\n", map->id);
+	rcu_read_unlock();
+}
+
+static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
+					       struct bpf_link_info *info)
+{
+	struct bpf_struct_ops_link *st_link;
+	struct bpf_map *map;
+
+	st_link = container_of(link, struct bpf_struct_ops_link, link);
+	rcu_read_lock();
+	map = rcu_dereference(st_link->map);
+	if (map)
+		info->struct_ops.map_id = map->id;
+	rcu_read_unlock();
+	return 0;
+}
+
+static const struct bpf_link_ops bpf_struct_ops_map_lops = {
+	.dealloc = bpf_struct_ops_map_link_dealloc,
+	.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
+	.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
+};
+
+int bpf_struct_ops_link_create(union bpf_attr *attr)
+{
+	struct bpf_struct_ops_link *link = NULL;
+	struct bpf_link_primer link_primer;
+	struct bpf_struct_ops_map *st_map;
+	struct bpf_map *map;
+	int err;
+
+	map = bpf_map_get(attr->link_create.map_fd);
+	if (!map)
+		return -EINVAL;
+
+	st_map = (struct bpf_struct_ops_map *)map;
+
+	if (map->map_type != BPF_MAP_TYPE_STRUCT_OPS || !(map->map_flags & BPF_F_LINK) ||
+	    /* Pair with smp_store_release() during map_update */
+	    smp_load_acquire(&st_map->kvalue.state) != BPF_STRUCT_OPS_STATE_READY) {
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	link = kzalloc(sizeof(*link), GFP_USER);
+	if (!link) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops, NULL);
+	RCU_INIT_POINTER(link->map, map);
+
+	err = bpf_link_prime(&link->link, &link_primer);
+	if (err)
+		goto err_out;
+
+	err = st_map->st_ops->reg(st_map->kvalue.data);
+	if (err) {
+		bpf_link_cleanup(&link_primer);
+		goto err_out;
+	}
+
+	return bpf_link_settle(&link_primer);
+
+err_out:
+	bpf_map_put(map);
+	kfree(link);
+	return err;
+}
+
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 03273cddd6bd..3a4503987a48 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2806,16 +2806,19 @@  static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
 	const struct bpf_prog *prog = link->prog;
 	char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
 
-	bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
 	seq_printf(m,
 		   "link_type:\t%s\n"
-		   "link_id:\t%u\n"
-		   "prog_tag:\t%s\n"
-		   "prog_id:\t%u\n",
+		   "link_id:\t%u\n",
 		   bpf_link_type_strs[link->type],
-		   link->id,
-		   prog_tag,
-		   prog->aux->id);
+		   link->id);
+	if (prog) {
+		bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
+		seq_printf(m,
+			   "prog_tag:\t%s\n"
+			   "prog_id:\t%u\n",
+			   prog_tag,
+			   prog->aux->id);
+	}
 	if (link->ops->show_fdinfo)
 		link->ops->show_fdinfo(link, m);
 }
@@ -4290,7 +4293,8 @@  static int bpf_link_get_info_by_fd(struct file *file,
 
 	info.type = link->type;
 	info.id = link->id;
-	info.prog_id = link->prog->aux->id;
+	if (link->prog)
+		info.prog_id = link->prog->aux->id;
 
 	if (link->ops->fill_link_info) {
 		err = link->ops->fill_link_info(link, &info);
@@ -4553,6 +4557,9 @@  static int link_create(union bpf_attr *attr, bpfptr_t uattr)
 	if (CHECK_ATTR(BPF_LINK_CREATE))
 		return -EINVAL;
 
+	if (attr->link_create.attach_type == BPF_STRUCT_OPS)
+		return bpf_struct_ops_link_create(attr);
+
 	prog = bpf_prog_get(attr->link_create.prog_fd);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 976b194eb775..051b85525302 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1033,6 +1033,7 @@  enum bpf_attach_type {
 	BPF_PERF_EVENT,
 	BPF_TRACE_KPROBE_MULTI,
 	BPF_LSM_CGROUP,
+	BPF_STRUCT_OPS,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -1266,6 +1267,9 @@  enum {
 
 /* Create a map that is suitable to be an inner map with dynamic max entries */
 	BPF_F_INNER_MAP		= (1U << 12),
+
+/* Create a map that will be registered/unregesitered by the backed bpf_link */
+	BPF_F_LINK		= (1U << 13),
 };
 
 /* Flags for BPF_PROG_QUERY. */
@@ -1507,7 +1511,10 @@  union bpf_attr {
 	} task_fd_query;
 
 	struct { /* struct used by BPF_LINK_CREATE command */
-		__u32		prog_fd;	/* eBPF program to attach */
+		union {
+			__u32		prog_fd;	/* eBPF program to attach */
+			__u32		map_fd;		/* eBPF struct_ops to attach */
+		};
 		union {
 			__u32		target_fd;	/* object to attach to */
 			__u32		target_ifindex; /* target ifindex */
@@ -6379,6 +6386,9 @@  struct bpf_link_info {
 		struct {
 			__u32 ifindex;
 		} xdp;
+		struct {
+			__u32 map_id;
+		} struct_ops;
 	};
 } __attribute__((aligned(8)));