diff mbox series

[bpf-next,v13,08/14] bpf: hold module for bpf_struct_ops_map.

Message ID 20231209002709.535966-9-thinker.li@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Registrating struct_ops types from modules | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success SINGLE THREAD; Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 7875 this patch: 7875
netdev/cc_maintainers warning 8 maintainers not CCed: kpsingh@kernel.org daniel@iogearbox.net netdev@vger.kernel.org yonghong.song@linux.dev sdf@google.com haoluo@google.com jolsa@kernel.org john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 2579 this patch: 2579
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8410 this patch: 8410
netdev/checkpatch warning CHECK: Unbalanced braces around else statement WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP WARNING: line length of 87 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc fail Errors and warnings before: 0 this patch: 1
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-13 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-15 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Kui-Feng Lee Dec. 9, 2023, 12:27 a.m. UTC
From: Kui-Feng Lee <thinker.li@gmail.com>

To ensure that a module remains accessible whenever a struct_ops object of
a struct_ops type provided by the module is still in use.

struct bpf_strct_ops_map doesn't hold a refcnt to btf anymore sicne a
module will hold a refcnt to it's btf already. But, struct_ops programs are
different. They hold their associated btf, not the module since they need
only btf to assure their types (signatures).

Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
 include/linux/bpf.h          |  1 +
 include/linux/bpf_verifier.h |  1 +
 kernel/bpf/bpf_struct_ops.c  | 28 +++++++++++++++++++++++-----
 kernel/bpf/verifier.c        | 10 ++++++++++
 4 files changed, 35 insertions(+), 5 deletions(-)

Comments

Martin KaFai Lau Dec. 15, 2023, 5:54 a.m. UTC | #1
On 12/8/23 4:27 PM, thinker.li@gmail.com wrote:
> From: Kui-Feng Lee <thinker.li@gmail.com>
> 
> To ensure that a module remains accessible whenever a struct_ops object of
> a struct_ops type provided by the module is still in use.
> 
> struct bpf_strct_ops_map doesn't hold a refcnt to btf anymore sicne a

s /bpf_strct_/bpf_struct_/

s/sicne/since/

> module will hold a refcnt to it's btf already. But, struct_ops programs are
> different. They hold their associated btf, not the module since they need
> only btf to assure their types (signatures).

The patch subject is not accurate. The patch holds the module refcnt when 
verifying the bpf prog also. May be "hold module refcnt in struct_ops map 
creation and prog verification".

The commit message also is inaccurate on the prog load. It did not mention the 
module is also held when loading struct_ops prog but it is only held during the 
verification time. Please explain why it is only needed during the verification 
time.

> 
> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
> ---
>   include/linux/bpf.h          |  1 +
>   include/linux/bpf_verifier.h |  1 +
>   kernel/bpf/bpf_struct_ops.c  | 28 +++++++++++++++++++++++-----
>   kernel/bpf/verifier.c        | 10 ++++++++++
>   4 files changed, 35 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 91bcd62d6fcf..c5c7cc4552f5 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1681,6 +1681,7 @@ struct bpf_struct_ops {
>   	void (*unreg)(void *kdata);
>   	int (*update)(void *kdata, void *old_kdata);
>   	int (*validate)(void *kdata);
> +	struct module *owner;
>   	const char *name;
>   	struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS];
>   };
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index 314b679fb494..01113bcdd479 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -651,6 +651,7 @@ struct bpf_verifier_env {
>   	u32 prev_insn_idx;
>   	struct bpf_prog *prog;		/* eBPF program being verified */
>   	const struct bpf_verifier_ops *ops;
> +	struct module *attach_btf_mod;	/* The owner module of prog->aux->attach_btf */
>   	struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
>   	int stack_size;			/* number of states to be processed */
>   	bool strict_alignment;		/* perform strict pointer alignment checks */
> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> index f943f8378e76..a838f7c7d583 100644
> --- a/kernel/bpf/bpf_struct_ops.c
> +++ b/kernel/bpf/bpf_struct_ops.c
> @@ -641,12 +641,15 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
>   		bpf_jit_uncharge_modmem(PAGE_SIZE);
>   	}
>   	bpf_map_area_free(st_map->uvalue);
> -	btf_put(st_map->btf);
>   	bpf_map_area_free(st_map);
>   }
>   
>   static void bpf_struct_ops_map_free(struct bpf_map *map)
>   {
> +	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
> +
> +	module_put(st_map->st_ops_desc->st_ops->owner);

The module_get was not done on st_ops->owner when st_map->btf is btf_vmlinux 
(i.e. not module). Although it probably does not matter, I would feel more 
comfortable if it only releases for the things that it did acquire earlier.

	/* st_ops->owner was acquired during map_alloc to implicitly holds
	 * the btf's refcnt. The acquire was only done when btf_is_module()
	 * st_map->btf cannot be NULL here.
	 */
	if (btf_is_module(st_map->btf))
		module_put(st_map->st_ops_desc->st_ops->owner);

> +
>   	/* The struct_ops's function may switch to another struct_ops.
>   	 *
>   	 * For example, bpf_tcp_cc_x->init() may switch to
> @@ -681,6 +684,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>   	size_t st_map_size;
>   	struct bpf_struct_ops_map *st_map;
>   	const struct btf_type *t, *vt;
> +	struct module *mod = NULL;
>   	struct bpf_map *map;
>   	struct btf *btf;
>   	int ret;
> @@ -690,10 +694,20 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>   		btf = btf_get_by_fd(attr->value_type_btf_obj_fd);
>   		if (IS_ERR(btf))
>   			return ERR_PTR(PTR_ERR(btf));
> -	} else {
> +
> +		if (btf != btf_vmlinux) {
> +			mod = btf_try_get_module(btf);
> +			if (!mod) {
> +				btf_put(btf);
> +				return ERR_PTR(-EINVAL);
> +			}
> +		}
> +		/* mod (NULL for btf_vmlinux) holds a refcnt to btf. We
> +		 * don't need an extra refcnt here.
> +		 */
> +		btf_put(btf);
> +	} else
>   		btf = btf_vmlinux;
> -		btf_get(btf);
> -	}
>   
>   	st_ops_desc = bpf_struct_ops_find_value(btf, attr->btf_vmlinux_value_type_id);
>   	if (!st_ops_desc) {
> @@ -756,7 +770,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>   errout_free:
>   	__bpf_struct_ops_map_free(map);
>   errout:
> -	btf_put(btf);
> +	module_put(mod);
>   
>   	return ERR_PTR(ret);
>   }
> @@ -886,6 +900,10 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
>   	if (!bpf_struct_ops_valid_to_reg(new_map))
>   		return -EINVAL;
>   
> +	/* The old map is holding the refcount for the owner module.  The
> +	 * ownership of the owner module refcount is going to be
> +	 * transferred from the old map to the new map.
> +	 */

This part I don't understand. Both old and new map hold its own module's 
refcount at map_alloc time and release its own module refcnt during map_free().
Where the module refcount transfer happened?

>   	if (!st_map->st_ops_desc->st_ops->update)
>   		return -EOPNOTSUPP;
>   
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 795c16f9cf57..c303cf2fb5ff 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -20079,6 +20079,14 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
>   	}
>   
>   	btf = prog->aux->attach_btf;
> +	if (btf != btf_vmlinux) {

	if (btf_is_module(btf)) {

> +		/* Make sure st_ops is valid through the lifetime of env */
> +		env->attach_btf_mod = btf_try_get_module(btf);
> +		if (!env->attach_btf_mod) {
> +			verbose(env, "owner module of btf is not found\n");
> +			return -ENOTSUPP;
> +		}
> +	}
>   
>   	btf_id = prog->aux->attach_btf_id;
>   	st_ops_desc = bpf_struct_ops_find(btf, btf_id);
> @@ -20792,6 +20800,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
>   		env->prog->expected_attach_type = 0;
>   
>   	*prog = env->prog;
> +
> +	module_put(env->attach_btf_mod);
>   err_unlock:
>   	if (!is_priv)
>   		mutex_unlock(&bpf_verifier_lock);
Kui-Feng Lee Dec. 15, 2023, 11:25 p.m. UTC | #2
On 12/14/23 21:54, Martin KaFai Lau wrote:
> On 12/8/23 4:27 PM, thinker.li@gmail.com wrote:
>> From: Kui-Feng Lee <thinker.li@gmail.com>
>>
>> To ensure that a module remains accessible whenever a struct_ops 
>> object of
>> a struct_ops type provided by the module is still in use.
>>
>> struct bpf_strct_ops_map doesn't hold a refcnt to btf anymore sicne a
> 
> s /bpf_strct_/bpf_struct_/
> 
> s/sicne/since/
> 
>> module will hold a refcnt to it's btf already. But, struct_ops 
>> programs are
>> different. They hold their associated btf, not the module since they need
>> only btf to assure their types (signatures).
> 
> The patch subject is not accurate. The patch holds the module refcnt 
> when verifying the bpf prog also. May be "hold module refcnt in 
> struct_ops map creation and prog verification".
> 
> The commit message also is inaccurate on the prog load. It did not 
> mention the module is also held when loading struct_ops prog but it is 
> only held during the verification time. Please explain why it is only 
> needed during the verification time.
> 
>>
>> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
>> ---
>>   include/linux/bpf.h          |  1 +
>>   include/linux/bpf_verifier.h |  1 +
>>   kernel/bpf/bpf_struct_ops.c  | 28 +++++++++++++++++++++++-----
>>   kernel/bpf/verifier.c        | 10 ++++++++++
>>   4 files changed, 35 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 91bcd62d6fcf..c5c7cc4552f5 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -1681,6 +1681,7 @@ struct bpf_struct_ops {
>>       void (*unreg)(void *kdata);
>>       int (*update)(void *kdata, void *old_kdata);
>>       int (*validate)(void *kdata);
>> +    struct module *owner;
>>       const char *name;
>>       struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS];
>>   };
>> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
>> index 314b679fb494..01113bcdd479 100644
>> --- a/include/linux/bpf_verifier.h
>> +++ b/include/linux/bpf_verifier.h
>> @@ -651,6 +651,7 @@ struct bpf_verifier_env {
>>       u32 prev_insn_idx;
>>       struct bpf_prog *prog;        /* eBPF program being verified */
>>       const struct bpf_verifier_ops *ops;
>> +    struct module *attach_btf_mod;    /* The owner module of 
>> prog->aux->attach_btf */
>>       struct bpf_verifier_stack_elem *head; /* stack of verifier 
>> states to be processed */
>>       int stack_size;            /* number of states to be processed */
>>       bool strict_alignment;        /* perform strict pointer 
>> alignment checks */
>> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
>> index f943f8378e76..a838f7c7d583 100644
>> --- a/kernel/bpf/bpf_struct_ops.c
>> +++ b/kernel/bpf/bpf_struct_ops.c
>> @@ -641,12 +641,15 @@ static void __bpf_struct_ops_map_free(struct 
>> bpf_map *map)
>>           bpf_jit_uncharge_modmem(PAGE_SIZE);
>>       }
>>       bpf_map_area_free(st_map->uvalue);
>> -    btf_put(st_map->btf);
>>       bpf_map_area_free(st_map);
>>   }
>>   static void bpf_struct_ops_map_free(struct bpf_map *map)
>>   {
>> +    struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map 
>> *)map;
>> +
>> +    module_put(st_map->st_ops_desc->st_ops->owner);
> 
> The module_get was not done on st_ops->owner when st_map->btf is 
> btf_vmlinux (i.e. not module). Although it probably does not matter, I 
> would feel more comfortable if it only releases for the things that it 
> did acquire earlier.
> 
>      /* st_ops->owner was acquired during map_alloc to implicitly holds
>       * the btf's refcnt. The acquire was only done when btf_is_module()
>       * st_map->btf cannot be NULL here.
>       */
>      if (btf_is_module(st_map->btf))
>          module_put(st_map->st_ops_desc->st_ops->owner);

Sure! I will update it.

> 
>> +
>>       /* The struct_ops's function may switch to another struct_ops.
>>        *
>>        * For example, bpf_tcp_cc_x->init() may switch to
>> @@ -681,6 +684,7 @@ static struct bpf_map 
>> *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>>       size_t st_map_size;
>>       struct bpf_struct_ops_map *st_map;
>>       const struct btf_type *t, *vt;
>> +    struct module *mod = NULL;
>>       struct bpf_map *map;
>>       struct btf *btf;
>>       int ret;
>> @@ -690,10 +694,20 @@ static struct bpf_map 
>> *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>>           btf = btf_get_by_fd(attr->value_type_btf_obj_fd);
>>           if (IS_ERR(btf))
>>               return ERR_PTR(PTR_ERR(btf));
>> -    } else {
>> +
>> +        if (btf != btf_vmlinux) {
>> +            mod = btf_try_get_module(btf);
>> +            if (!mod) {
>> +                btf_put(btf);
>> +                return ERR_PTR(-EINVAL);
>> +            }
>> +        }
>> +        /* mod (NULL for btf_vmlinux) holds a refcnt to btf. We
>> +         * don't need an extra refcnt here.
>> +         */
>> +        btf_put(btf);
>> +    } else
>>           btf = btf_vmlinux;
>> -        btf_get(btf);
>> -    }
>>       st_ops_desc = bpf_struct_ops_find_value(btf, 
>> attr->btf_vmlinux_value_type_id);
>>       if (!st_ops_desc) {
>> @@ -756,7 +770,7 @@ static struct bpf_map 
>> *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>>   errout_free:
>>       __bpf_struct_ops_map_free(map);
>>   errout:
>> -    btf_put(btf);
>> +    module_put(mod);
>>       return ERR_PTR(ret);
>>   }
>> @@ -886,6 +900,10 @@ static int bpf_struct_ops_map_link_update(struct 
>> bpf_link *link, struct bpf_map
>>       if (!bpf_struct_ops_valid_to_reg(new_map))
>>           return -EINVAL;
>> +    /* The old map is holding the refcount for the owner module.  The
>> +     * ownership of the owner module refcount is going to be
>> +     * transferred from the old map to the new map.
>> +     */
> 
> This part I don't understand. Both old and new map hold its own module's 
> refcount at map_alloc time and release its own module refcnt during 
> map_free().
> Where the module refcount transfer happened?

Sorry! This comment is not more valid. I will remove it.

> 
>>       if (!st_map->st_ops_desc->st_ops->update)
>>           return -EOPNOTSUPP;
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index 795c16f9cf57..c303cf2fb5ff 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -20079,6 +20079,14 @@ static int check_struct_ops_btf_id(struct 
>> bpf_verifier_env *env)
>>       }
>>       btf = prog->aux->attach_btf;
>> +    if (btf != btf_vmlinux) {
> 
>      if (btf_is_module(btf)) {
> 

Got it!

>> +        /* Make sure st_ops is valid through the lifetime of env */
>> +        env->attach_btf_mod = btf_try_get_module(btf);
>> +        if (!env->attach_btf_mod) {
>> +            verbose(env, "owner module of btf is not found\n");
>> +            return -ENOTSUPP;
>> +        }
>> +    }
>>       btf_id = prog->aux->attach_btf_id;
>>       st_ops_desc = bpf_struct_ops_find(btf, btf_id);
>> @@ -20792,6 +20800,8 @@ int bpf_check(struct bpf_prog **prog, union 
>> bpf_attr *attr, bpfptr_t uattr, __u3
>>           env->prog->expected_attach_type = 0;
>>       *prog = env->prog;
>> +
>> +    module_put(env->attach_btf_mod);
>>   err_unlock:
>>       if (!is_priv)
>>           mutex_unlock(&bpf_verifier_lock);
>
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 91bcd62d6fcf..c5c7cc4552f5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1681,6 +1681,7 @@  struct bpf_struct_ops {
 	void (*unreg)(void *kdata);
 	int (*update)(void *kdata, void *old_kdata);
 	int (*validate)(void *kdata);
+	struct module *owner;
 	const char *name;
 	struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS];
 };
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 314b679fb494..01113bcdd479 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -651,6 +651,7 @@  struct bpf_verifier_env {
 	u32 prev_insn_idx;
 	struct bpf_prog *prog;		/* eBPF program being verified */
 	const struct bpf_verifier_ops *ops;
+	struct module *attach_btf_mod;	/* The owner module of prog->aux->attach_btf */
 	struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
 	int stack_size;			/* number of states to be processed */
 	bool strict_alignment;		/* perform strict pointer alignment checks */
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index f943f8378e76..a838f7c7d583 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -641,12 +641,15 @@  static void __bpf_struct_ops_map_free(struct bpf_map *map)
 		bpf_jit_uncharge_modmem(PAGE_SIZE);
 	}
 	bpf_map_area_free(st_map->uvalue);
-	btf_put(st_map->btf);
 	bpf_map_area_free(st_map);
 }
 
 static void bpf_struct_ops_map_free(struct bpf_map *map)
 {
+	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
+
+	module_put(st_map->st_ops_desc->st_ops->owner);
+
 	/* The struct_ops's function may switch to another struct_ops.
 	 *
 	 * For example, bpf_tcp_cc_x->init() may switch to
@@ -681,6 +684,7 @@  static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 	size_t st_map_size;
 	struct bpf_struct_ops_map *st_map;
 	const struct btf_type *t, *vt;
+	struct module *mod = NULL;
 	struct bpf_map *map;
 	struct btf *btf;
 	int ret;
@@ -690,10 +694,20 @@  static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 		btf = btf_get_by_fd(attr->value_type_btf_obj_fd);
 		if (IS_ERR(btf))
 			return ERR_PTR(PTR_ERR(btf));
-	} else {
+
+		if (btf != btf_vmlinux) {
+			mod = btf_try_get_module(btf);
+			if (!mod) {
+				btf_put(btf);
+				return ERR_PTR(-EINVAL);
+			}
+		}
+		/* mod (NULL for btf_vmlinux) holds a refcnt to btf. We
+		 * don't need an extra refcnt here.
+		 */
+		btf_put(btf);
+	} else
 		btf = btf_vmlinux;
-		btf_get(btf);
-	}
 
 	st_ops_desc = bpf_struct_ops_find_value(btf, attr->btf_vmlinux_value_type_id);
 	if (!st_ops_desc) {
@@ -756,7 +770,7 @@  static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 errout_free:
 	__bpf_struct_ops_map_free(map);
 errout:
-	btf_put(btf);
+	module_put(mod);
 
 	return ERR_PTR(ret);
 }
@@ -886,6 +900,10 @@  static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
 	if (!bpf_struct_ops_valid_to_reg(new_map))
 		return -EINVAL;
 
+	/* The old map is holding the refcount for the owner module.  The
+	 * ownership of the owner module refcount is going to be
+	 * transferred from the old map to the new map.
+	 */
 	if (!st_map->st_ops_desc->st_ops->update)
 		return -EOPNOTSUPP;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 795c16f9cf57..c303cf2fb5ff 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20079,6 +20079,14 @@  static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
 	}
 
 	btf = prog->aux->attach_btf;
+	if (btf != btf_vmlinux) {
+		/* Make sure st_ops is valid through the lifetime of env */
+		env->attach_btf_mod = btf_try_get_module(btf);
+		if (!env->attach_btf_mod) {
+			verbose(env, "owner module of btf is not found\n");
+			return -ENOTSUPP;
+		}
+	}
 
 	btf_id = prog->aux->attach_btf_id;
 	st_ops_desc = bpf_struct_ops_find(btf, btf_id);
@@ -20792,6 +20800,8 @@  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 		env->prog->expected_attach_type = 0;
 
 	*prog = env->prog;
+
+	module_put(env->attach_btf_mod);
 err_unlock:
 	if (!is_priv)
 		mutex_unlock(&bpf_verifier_lock);