diff mbox series

[v2,bpf-next,2/5] libbpf: Prepare light skeleton for the kernel.

Message ID 20220208191306.6136-3-alexei.starovoitov@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Light skeleton for the kernel. | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 7 maintainers not CCed: kpsingh@kernel.org john.fastabend@gmail.com kafai@fb.com songliubraving@fb.com ast@kernel.org yhs@fb.com netdev@vger.kernel.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch fail CHECK: No space is necessary after a cast CHECK: multiple assignments should be avoided ERROR: Macros with complex values should be enclosed in parentheses WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next fail VM_Test

Commit Message

Alexei Starovoitov Feb. 8, 2022, 7:13 p.m. UTC
From: Alexei Starovoitov <ast@kernel.org>

Prepare light skeleton to be used in the kernel module and in the user space.
The look and feel of lskel.h is mostly the same with the difference that for
user space the skel->rodata is the same pointer before and after skel_load
operation, while in the kernel the skel->rodata after skel_open and the
skel->rodata after skel_load are different pointers.
Typical usage of skeleton remains the same for kernel and user space:
skel = my_bpf__open();
skel->rodata->my_global_var = init_val;
err = my_bpf__load(skel);
err = my_bpf__attach(skel);
// access skel->rodata->my_global_var;
// access skel->bss->another_var;

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/skel_internal.h | 193 +++++++++++++++++++++++++++++++---
 1 file changed, 176 insertions(+), 17 deletions(-)

Comments

Yonghong Song Feb. 9, 2022, 12:13 a.m. UTC | #1
On 2/8/22 11:13 AM, Alexei Starovoitov wrote:
> From: Alexei Starovoitov <ast@kernel.org>
> 
> Prepare light skeleton to be used in the kernel module and in the user space.
> The look and feel of lskel.h is mostly the same with the difference that for
> user space the skel->rodata is the same pointer before and after skel_load
> operation, while in the kernel the skel->rodata after skel_open and the
> skel->rodata after skel_load are different pointers.
> Typical usage of skeleton remains the same for kernel and user space:
> skel = my_bpf__open();
> skel->rodata->my_global_var = init_val;
> err = my_bpf__load(skel);
> err = my_bpf__attach(skel);
> // access skel->rodata->my_global_var;
> // access skel->bss->another_var;
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
>   tools/lib/bpf/skel_internal.h | 193 +++++++++++++++++++++++++++++++---
>   1 file changed, 176 insertions(+), 17 deletions(-)
> 
> diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
> index dcd3336512d4..d16544666341 100644
> --- a/tools/lib/bpf/skel_internal.h
> +++ b/tools/lib/bpf/skel_internal.h
> @@ -3,9 +3,19 @@
>   #ifndef __SKEL_INTERNAL_H
>   #define __SKEL_INTERNAL_H
>   
> +#ifdef __KERNEL__
> +#include <linux/fdtable.h>
> +#include <linux/mm.h>
> +#include <linux/mman.h>
> +#include <linux/slab.h>
> +#include <linux/bpf.h>
> +#else
>   #include <unistd.h>
>   #include <sys/syscall.h>
>   #include <sys/mman.h>
> +#include <stdlib.h>
> +#include "bpf.h"
> +#endif
>   
>   #ifndef __NR_bpf
>   # if defined(__mips__) && defined(_ABIO32)
> @@ -25,17 +35,11 @@
>    * requested during loader program generation.
>    */
>   struct bpf_map_desc {
> -	union {
> -		/* input for the loader prog */
> -		struct {
> -			__aligned_u64 initial_value;
> -			__u32 max_entries;
> -		};
> -		/* output of the loader prog */
> -		struct {
> -			int map_fd;
> -		};
> -	};
> +	/* output of the loader prog */
> +	int map_fd;
> +	/* input for the loader prog */
> +	__u32 max_entries;
> +	__aligned_u64 initial_value;
>   };
>   struct bpf_prog_desc {
>   	int prog_fd;
> @@ -57,12 +61,159 @@ struct bpf_load_and_run_opts {
>   	const char *errstr;
>   };
>   
> +long bpf_sys_bpf(__u32 cmd, void *attr, __u32 attr_size);
> +
>   static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
>   			  unsigned int size)
>   {
> +#ifdef __KERNEL__
> +	return bpf_sys_bpf(cmd, attr, size);
> +#else
>   	return syscall(__NR_bpf, cmd, attr, size);
> +#endif
> +}
> +
> +#ifdef __KERNEL__
> +static inline int close(int fd)
> +{
> +	return close_fd(fd);
> +}
> +
> +static inline void *skel_alloc(size_t size)
> +{
> +	return kcalloc(1, size, GFP_KERNEL);
> +}
> +
> +static inline void skel_free(const void *p)
> +{
> +	kfree(p);
> +}
> +
> +/* skel->bss/rodata maps are populated in three steps.
> + *
> + * For kernel use:
> + * skel_prep_map_data() allocates kernel memory that kernel module can directly access.
> + * skel_prep_init_value() allocates a region in user space process and copies
> + * potentially modified initial map value into it.
> + * The loader program will perform copy_from_user() from maps.rodata.initial_value.
> + * skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and
> + * does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree
> + * is not nessary.
> + *
> + * For user space:
> + * skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly.
> + * skel_prep_init_value() copies rodata pointer into map.rodata.initial_value.
> + * The loader program will perform copy_from_user() from maps.rodata.initial_value.
> + * skel_finalize_map_data() remaps bpf array map value from the kernel memory into
> + * skel->rodata address.
> + *
> + * The "bpftool gen skeleton -L" command generates lskel.h that is suitable for
> + * both kernel and user space. The generated loader program does
> + * copy_from_user() from intial_value. Therefore the vm_mmap+copy_to_user step
> + * is need when lskel is used from the kernel module.
> + */
> +static inline void skel_free_map_data(void *p, __u64 addr, size_t sz)
> +{
> +	if (addr && addr != ~0ULL)
> +		vm_munmap(addr, sz);
> +	if (addr != ~0ULL)
> +		kvfree(p);
> +	/* When addr == ~0ULL the 'p' points to
> +	 * ((struct bpf_array *)map)->value. See skel_finalize_map_data.
> +	 */
> +}
> +
> +static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz)
> +{
> +	void *addr;
> +
> +	addr = kvmalloc(val_sz, GFP_KERNEL);
> +	if (!addr)
> +		return NULL;
> +	memcpy(addr, val, val_sz);
> +	return addr;
> +}
> +
> +static inline __u64 skel_prep_init_value(void **addr, size_t mmap_sz, size_t val_sz)
> +{
> +	__u64 ret = 0;
> +	void *uaddr;
> +
> +	uaddr = (void *) vm_mmap(NULL, 0, mmap_sz, PROT_READ | PROT_WRITE,
> +				 MAP_SHARED | MAP_ANONYMOUS, 0);
> +	if (IS_ERR(uaddr))
> +		goto out;
> +	if (copy_to_user(uaddr, *addr, val_sz)) {
> +		vm_munmap((long) uaddr, mmap_sz);
> +		goto out;
> +	}
> +	ret = (__u64) (long) uaddr;
> +out:
> +	kvfree(*addr);
> +	*addr = NULL;
> +	return ret;
>   }
>   
> +static inline void *skel_finalize_map_data(__u64 *addr, size_t mmap_sz, int flags, int fd)
> +{
> +	struct bpf_map *map;
> +	void *ptr = NULL;
> +
> +	vm_munmap(*addr, mmap_sz);
> +	*addr = ~0ULL;
> +
> +	map = bpf_map_get(fd);
> +	if (IS_ERR(map))
> +		return NULL;
> +	if (map->map_type != BPF_MAP_TYPE_ARRAY)
> +		goto out;

Should we do more map validation here, e.g., max_entries = 1
and also checking value_size?

> +	ptr = ((struct bpf_array *)map)->value;
> +	/* the ptr stays valid, since FD is not closed */
> +out:
> +	bpf_map_put(map);
> +	return ptr;
> +}
> +
[...]
Alexei Starovoitov Feb. 9, 2022, 12:44 a.m. UTC | #2
On Tue, Feb 08, 2022 at 04:13:01PM -0800, Yonghong Song wrote:
> 
> 
> On 2/8/22 11:13 AM, Alexei Starovoitov wrote:
> > From: Alexei Starovoitov <ast@kernel.org>
> > 
> > Prepare light skeleton to be used in the kernel module and in the user space.
> > The look and feel of lskel.h is mostly the same with the difference that for
> > user space the skel->rodata is the same pointer before and after skel_load
> > operation, while in the kernel the skel->rodata after skel_open and the
> > skel->rodata after skel_load are different pointers.
> > Typical usage of skeleton remains the same for kernel and user space:
> > skel = my_bpf__open();
> > skel->rodata->my_global_var = init_val;
> > err = my_bpf__load(skel);
> > err = my_bpf__attach(skel);
> > // access skel->rodata->my_global_var;
> > // access skel->bss->another_var;
> > 
> > Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> > ---
> >   tools/lib/bpf/skel_internal.h | 193 +++++++++++++++++++++++++++++++---
> >   1 file changed, 176 insertions(+), 17 deletions(-)
> > 
> > diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
> > index dcd3336512d4..d16544666341 100644
> > --- a/tools/lib/bpf/skel_internal.h
> > +++ b/tools/lib/bpf/skel_internal.h
> > @@ -3,9 +3,19 @@
> >   #ifndef __SKEL_INTERNAL_H
> >   #define __SKEL_INTERNAL_H
> > +#ifdef __KERNEL__
> > +#include <linux/fdtable.h>
> > +#include <linux/mm.h>
> > +#include <linux/mman.h>
> > +#include <linux/slab.h>
> > +#include <linux/bpf.h>
> > +#else
> >   #include <unistd.h>
> >   #include <sys/syscall.h>
> >   #include <sys/mman.h>
> > +#include <stdlib.h>
> > +#include "bpf.h"
> > +#endif
> >   #ifndef __NR_bpf
> >   # if defined(__mips__) && defined(_ABIO32)
> > @@ -25,17 +35,11 @@
> >    * requested during loader program generation.
> >    */
> >   struct bpf_map_desc {
> > -	union {
> > -		/* input for the loader prog */
> > -		struct {
> > -			__aligned_u64 initial_value;
> > -			__u32 max_entries;
> > -		};
> > -		/* output of the loader prog */
> > -		struct {
> > -			int map_fd;
> > -		};
> > -	};
> > +	/* output of the loader prog */
> > +	int map_fd;
> > +	/* input for the loader prog */
> > +	__u32 max_entries;
> > +	__aligned_u64 initial_value;
> >   };
> >   struct bpf_prog_desc {
> >   	int prog_fd;
> > @@ -57,12 +61,159 @@ struct bpf_load_and_run_opts {
> >   	const char *errstr;
> >   };
> > +long bpf_sys_bpf(__u32 cmd, void *attr, __u32 attr_size);
> > +
> >   static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
> >   			  unsigned int size)
> >   {
> > +#ifdef __KERNEL__
> > +	return bpf_sys_bpf(cmd, attr, size);
> > +#else
> >   	return syscall(__NR_bpf, cmd, attr, size);
> > +#endif
> > +}
> > +
> > +#ifdef __KERNEL__
> > +static inline int close(int fd)
> > +{
> > +	return close_fd(fd);
> > +}
> > +
> > +static inline void *skel_alloc(size_t size)
> > +{
> > +	return kcalloc(1, size, GFP_KERNEL);
> > +}
> > +
> > +static inline void skel_free(const void *p)
> > +{
> > +	kfree(p);
> > +}
> > +
> > +/* skel->bss/rodata maps are populated in three steps.
> > + *
> > + * For kernel use:
> > + * skel_prep_map_data() allocates kernel memory that kernel module can directly access.
> > + * skel_prep_init_value() allocates a region in user space process and copies
> > + * potentially modified initial map value into it.
> > + * The loader program will perform copy_from_user() from maps.rodata.initial_value.
> > + * skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and
> > + * does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree
> > + * is not nessary.
> > + *
> > + * For user space:
> > + * skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly.
> > + * skel_prep_init_value() copies rodata pointer into map.rodata.initial_value.
> > + * The loader program will perform copy_from_user() from maps.rodata.initial_value.
> > + * skel_finalize_map_data() remaps bpf array map value from the kernel memory into
> > + * skel->rodata address.
> > + *
> > + * The "bpftool gen skeleton -L" command generates lskel.h that is suitable for
> > + * both kernel and user space. The generated loader program does
> > + * copy_from_user() from intial_value. Therefore the vm_mmap+copy_to_user step
> > + * is need when lskel is used from the kernel module.
> > + */
> > +static inline void skel_free_map_data(void *p, __u64 addr, size_t sz)
> > +{
> > +	if (addr && addr != ~0ULL)
> > +		vm_munmap(addr, sz);
> > +	if (addr != ~0ULL)
> > +		kvfree(p);
> > +	/* When addr == ~0ULL the 'p' points to
> > +	 * ((struct bpf_array *)map)->value. See skel_finalize_map_data.
> > +	 */
> > +}
> > +
> > +static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz)
> > +{
> > +	void *addr;
> > +
> > +	addr = kvmalloc(val_sz, GFP_KERNEL);
> > +	if (!addr)
> > +		return NULL;
> > +	memcpy(addr, val, val_sz);
> > +	return addr;
> > +}
> > +
> > +static inline __u64 skel_prep_init_value(void **addr, size_t mmap_sz, size_t val_sz)
> > +{
> > +	__u64 ret = 0;
> > +	void *uaddr;
> > +
> > +	uaddr = (void *) vm_mmap(NULL, 0, mmap_sz, PROT_READ | PROT_WRITE,
> > +				 MAP_SHARED | MAP_ANONYMOUS, 0);
> > +	if (IS_ERR(uaddr))
> > +		goto out;
> > +	if (copy_to_user(uaddr, *addr, val_sz)) {
> > +		vm_munmap((long) uaddr, mmap_sz);
> > +		goto out;
> > +	}
> > +	ret = (__u64) (long) uaddr;
> > +out:
> > +	kvfree(*addr);
> > +	*addr = NULL;
> > +	return ret;
> >   }
> > +static inline void *skel_finalize_map_data(__u64 *addr, size_t mmap_sz, int flags, int fd)
> > +{
> > +	struct bpf_map *map;
> > +	void *ptr = NULL;
> > +
> > +	vm_munmap(*addr, mmap_sz);
> > +	*addr = ~0ULL;
> > +
> > +	map = bpf_map_get(fd);
> > +	if (IS_ERR(map))
> > +		return NULL;
> > +	if (map->map_type != BPF_MAP_TYPE_ARRAY)
> > +		goto out;
> 
> Should we do more map validation here, e.g., max_entries = 1
> and also checking value_size?

The map_type check is a sanity check.
It should be valid by construction of loader prog.
The map is also mmap-able and when signed progs will come to life it will be
frozen and signature checked.
rodata map should be readonly too, but ((struct bpf_array *)map)->value
direct access assumes that the kernel module won't mess with the values.
imo map_type check is enough. More checks feels like overkill.
Yonghong Song Feb. 9, 2022, 1:27 a.m. UTC | #3
On 2/8/22 4:44 PM, Alexei Starovoitov wrote:
> On Tue, Feb 08, 2022 at 04:13:01PM -0800, Yonghong Song wrote:
>>
>>
>> On 2/8/22 11:13 AM, Alexei Starovoitov wrote:
>>> From: Alexei Starovoitov <ast@kernel.org>
>>>
>>> Prepare light skeleton to be used in the kernel module and in the user space.
>>> The look and feel of lskel.h is mostly the same with the difference that for
>>> user space the skel->rodata is the same pointer before and after skel_load
>>> operation, while in the kernel the skel->rodata after skel_open and the
>>> skel->rodata after skel_load are different pointers.
>>> Typical usage of skeleton remains the same for kernel and user space:
>>> skel = my_bpf__open();
>>> skel->rodata->my_global_var = init_val;
>>> err = my_bpf__load(skel);
>>> err = my_bpf__attach(skel);
>>> // access skel->rodata->my_global_var;
>>> // access skel->bss->another_var;
>>>
>>> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
>>> ---
>>>    tools/lib/bpf/skel_internal.h | 193 +++++++++++++++++++++++++++++++---
>>>    1 file changed, 176 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
>>> index dcd3336512d4..d16544666341 100644
>>> --- a/tools/lib/bpf/skel_internal.h
>>> +++ b/tools/lib/bpf/skel_internal.h
>>> @@ -3,9 +3,19 @@
>>>    #ifndef __SKEL_INTERNAL_H
>>>    #define __SKEL_INTERNAL_H
>>> +#ifdef __KERNEL__
>>> +#include <linux/fdtable.h>
>>> +#include <linux/mm.h>
>>> +#include <linux/mman.h>
>>> +#include <linux/slab.h>
>>> +#include <linux/bpf.h>
>>> +#else
>>>    #include <unistd.h>
>>>    #include <sys/syscall.h>
>>>    #include <sys/mman.h>
>>> +#include <stdlib.h>
>>> +#include "bpf.h"
>>> +#endif
>>>    #ifndef __NR_bpf
>>>    # if defined(__mips__) && defined(_ABIO32)
>>> @@ -25,17 +35,11 @@
>>>     * requested during loader program generation.
>>>     */
>>>    struct bpf_map_desc {
>>> -	union {
>>> -		/* input for the loader prog */
>>> -		struct {
>>> -			__aligned_u64 initial_value;
>>> -			__u32 max_entries;
>>> -		};
>>> -		/* output of the loader prog */
>>> -		struct {
>>> -			int map_fd;
>>> -		};
>>> -	};
>>> +	/* output of the loader prog */
>>> +	int map_fd;
>>> +	/* input for the loader prog */
>>> +	__u32 max_entries;
>>> +	__aligned_u64 initial_value;
>>>    };
>>>    struct bpf_prog_desc {
>>>    	int prog_fd;
>>> @@ -57,12 +61,159 @@ struct bpf_load_and_run_opts {
>>>    	const char *errstr;
>>>    };
>>> +long bpf_sys_bpf(__u32 cmd, void *attr, __u32 attr_size);
>>> +
>>>    static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
>>>    			  unsigned int size)
>>>    {
>>> +#ifdef __KERNEL__
>>> +	return bpf_sys_bpf(cmd, attr, size);
>>> +#else
>>>    	return syscall(__NR_bpf, cmd, attr, size);
>>> +#endif
>>> +}
>>> +
>>> +#ifdef __KERNEL__
>>> +static inline int close(int fd)
>>> +{
>>> +	return close_fd(fd);
>>> +}
>>> +
>>> +static inline void *skel_alloc(size_t size)
>>> +{
>>> +	return kcalloc(1, size, GFP_KERNEL);
>>> +}
>>> +
>>> +static inline void skel_free(const void *p)
>>> +{
>>> +	kfree(p);
>>> +}
>>> +
>>> +/* skel->bss/rodata maps are populated in three steps.
>>> + *
>>> + * For kernel use:
>>> + * skel_prep_map_data() allocates kernel memory that kernel module can directly access.
>>> + * skel_prep_init_value() allocates a region in user space process and copies
>>> + * potentially modified initial map value into it.
>>> + * The loader program will perform copy_from_user() from maps.rodata.initial_value.
>>> + * skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and
>>> + * does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree
>>> + * is not nessary.
>>> + *
>>> + * For user space:
>>> + * skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly.
>>> + * skel_prep_init_value() copies rodata pointer into map.rodata.initial_value.
>>> + * The loader program will perform copy_from_user() from maps.rodata.initial_value.
>>> + * skel_finalize_map_data() remaps bpf array map value from the kernel memory into
>>> + * skel->rodata address.
>>> + *
>>> + * The "bpftool gen skeleton -L" command generates lskel.h that is suitable for
>>> + * both kernel and user space. The generated loader program does
>>> + * copy_from_user() from intial_value. Therefore the vm_mmap+copy_to_user step
>>> + * is need when lskel is used from the kernel module.
>>> + */
>>> +static inline void skel_free_map_data(void *p, __u64 addr, size_t sz)
>>> +{
>>> +	if (addr && addr != ~0ULL)
>>> +		vm_munmap(addr, sz);
>>> +	if (addr != ~0ULL)
>>> +		kvfree(p);
>>> +	/* When addr == ~0ULL the 'p' points to
>>> +	 * ((struct bpf_array *)map)->value. See skel_finalize_map_data.
>>> +	 */
>>> +}
>>> +
>>> +static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz)
>>> +{
>>> +	void *addr;
>>> +
>>> +	addr = kvmalloc(val_sz, GFP_KERNEL);
>>> +	if (!addr)
>>> +		return NULL;
>>> +	memcpy(addr, val, val_sz);
>>> +	return addr;
>>> +}
>>> +
>>> +static inline __u64 skel_prep_init_value(void **addr, size_t mmap_sz, size_t val_sz)
>>> +{
>>> +	__u64 ret = 0;
>>> +	void *uaddr;
>>> +
>>> +	uaddr = (void *) vm_mmap(NULL, 0, mmap_sz, PROT_READ | PROT_WRITE,
>>> +				 MAP_SHARED | MAP_ANONYMOUS, 0);
>>> +	if (IS_ERR(uaddr))
>>> +		goto out;
>>> +	if (copy_to_user(uaddr, *addr, val_sz)) {
>>> +		vm_munmap((long) uaddr, mmap_sz);
>>> +		goto out;
>>> +	}
>>> +	ret = (__u64) (long) uaddr;
>>> +out:
>>> +	kvfree(*addr);
>>> +	*addr = NULL;
>>> +	return ret;
>>>    }
>>> +static inline void *skel_finalize_map_data(__u64 *addr, size_t mmap_sz, int flags, int fd)
>>> +{
>>> +	struct bpf_map *map;
>>> +	void *ptr = NULL;
>>> +
>>> +	vm_munmap(*addr, mmap_sz);
>>> +	*addr = ~0ULL;
>>> +
>>> +	map = bpf_map_get(fd);
>>> +	if (IS_ERR(map))
>>> +		return NULL;
>>> +	if (map->map_type != BPF_MAP_TYPE_ARRAY)
>>> +		goto out;
>>
>> Should we do more map validation here, e.g., max_entries = 1
>> and also checking value_size?
> 
> The map_type check is a sanity check.

Yes, I am aware of this as a sanity check. I am just wondering if we do 
this check whether we should do other checks as well.

> It should be valid by construction of loader prog.
> The map is also mmap-able and when signed progs will come to life it will be
> frozen and signature checked.
> rodata map should be readonly too, but ((struct bpf_array *)map)->value
> direct access assumes that the kernel module won't mess with the values.
> imo map_type check is enough. More checks feels like overkill.

I am okay with this. Maybe add a comment right before
bpf_map_get() to explain map with fd is created by loader prog so
map should be a valid array map with max_entries 1, the IS_ERR
and map_type checks are just sanity checks.

With this,

Acked-by: Yonghong Song <yhs@fb.com>
diff mbox series

Patch

diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index dcd3336512d4..d16544666341 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -3,9 +3,19 @@ 
 #ifndef __SKEL_INTERNAL_H
 #define __SKEL_INTERNAL_H
 
+#ifdef __KERNEL__
+#include <linux/fdtable.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#else
 #include <unistd.h>
 #include <sys/syscall.h>
 #include <sys/mman.h>
+#include <stdlib.h>
+#include "bpf.h"
+#endif
 
 #ifndef __NR_bpf
 # if defined(__mips__) && defined(_ABIO32)
@@ -25,17 +35,11 @@ 
  * requested during loader program generation.
  */
 struct bpf_map_desc {
-	union {
-		/* input for the loader prog */
-		struct {
-			__aligned_u64 initial_value;
-			__u32 max_entries;
-		};
-		/* output of the loader prog */
-		struct {
-			int map_fd;
-		};
-	};
+	/* output of the loader prog */
+	int map_fd;
+	/* input for the loader prog */
+	__u32 max_entries;
+	__aligned_u64 initial_value;
 };
 struct bpf_prog_desc {
 	int prog_fd;
@@ -57,12 +61,159 @@  struct bpf_load_and_run_opts {
 	const char *errstr;
 };
 
+long bpf_sys_bpf(__u32 cmd, void *attr, __u32 attr_size);
+
 static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
 			  unsigned int size)
 {
+#ifdef __KERNEL__
+	return bpf_sys_bpf(cmd, attr, size);
+#else
 	return syscall(__NR_bpf, cmd, attr, size);
+#endif
+}
+
+#ifdef __KERNEL__
+static inline int close(int fd)
+{
+	return close_fd(fd);
+}
+
+static inline void *skel_alloc(size_t size)
+{
+	return kcalloc(1, size, GFP_KERNEL);
+}
+
+static inline void skel_free(const void *p)
+{
+	kfree(p);
+}
+
+/* skel->bss/rodata maps are populated in three steps.
+ *
+ * For kernel use:
+ * skel_prep_map_data() allocates kernel memory that kernel module can directly access.
+ * skel_prep_init_value() allocates a region in user space process and copies
+ * potentially modified initial map value into it.
+ * The loader program will perform copy_from_user() from maps.rodata.initial_value.
+ * skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and
+ * does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree
+ * is not nessary.
+ *
+ * For user space:
+ * skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly.
+ * skel_prep_init_value() copies rodata pointer into map.rodata.initial_value.
+ * The loader program will perform copy_from_user() from maps.rodata.initial_value.
+ * skel_finalize_map_data() remaps bpf array map value from the kernel memory into
+ * skel->rodata address.
+ *
+ * The "bpftool gen skeleton -L" command generates lskel.h that is suitable for
+ * both kernel and user space. The generated loader program does
+ * copy_from_user() from intial_value. Therefore the vm_mmap+copy_to_user step
+ * is need when lskel is used from the kernel module.
+ */
+static inline void skel_free_map_data(void *p, __u64 addr, size_t sz)
+{
+	if (addr && addr != ~0ULL)
+		vm_munmap(addr, sz);
+	if (addr != ~0ULL)
+		kvfree(p);
+	/* When addr == ~0ULL the 'p' points to
+	 * ((struct bpf_array *)map)->value. See skel_finalize_map_data.
+	 */
+}
+
+static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz)
+{
+	void *addr;
+
+	addr = kvmalloc(val_sz, GFP_KERNEL);
+	if (!addr)
+		return NULL;
+	memcpy(addr, val, val_sz);
+	return addr;
+}
+
+static inline __u64 skel_prep_init_value(void **addr, size_t mmap_sz, size_t val_sz)
+{
+	__u64 ret = 0;
+	void *uaddr;
+
+	uaddr = (void *) vm_mmap(NULL, 0, mmap_sz, PROT_READ | PROT_WRITE,
+				 MAP_SHARED | MAP_ANONYMOUS, 0);
+	if (IS_ERR(uaddr))
+		goto out;
+	if (copy_to_user(uaddr, *addr, val_sz)) {
+		vm_munmap((long) uaddr, mmap_sz);
+		goto out;
+	}
+	ret = (__u64) (long) uaddr;
+out:
+	kvfree(*addr);
+	*addr = NULL;
+	return ret;
 }
 
+static inline void *skel_finalize_map_data(__u64 *addr, size_t mmap_sz, int flags, int fd)
+{
+	struct bpf_map *map;
+	void *ptr = NULL;
+
+	vm_munmap(*addr, mmap_sz);
+	*addr = ~0ULL;
+
+	map = bpf_map_get(fd);
+	if (IS_ERR(map))
+		return NULL;
+	if (map->map_type != BPF_MAP_TYPE_ARRAY)
+		goto out;
+	ptr = ((struct bpf_array *)map)->value;
+	/* the ptr stays valid, since FD is not closed */
+out:
+	bpf_map_put(map);
+	return ptr;
+}
+
+#else
+
+static inline void *skel_alloc(size_t size)
+{
+	return calloc(1, size);
+}
+
+static inline void skel_free(void *p)
+{
+	free(p);
+}
+
+static inline void skel_free_map_data(void *p, __u64 addr, size_t sz)
+{
+	munmap(p, sz);
+}
+
+static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz)
+{
+	void *addr;
+
+	addr = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+		    MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	if (addr == (void *) -1)
+		return NULL;
+	memcpy(addr, val, val_sz);
+	return addr;
+}
+
+static inline __u64 skel_prep_init_value(void **addr, size_t mmap_sz, size_t val_sz)
+{
+	return (__u64) (long) *addr;
+}
+
+static inline void *skel_finalize_map_data(__u64 *addr, size_t mmap_sz, int flags, int fd)
+{
+	return mmap((void *)*addr, mmap_sz, flags, MAP_SHARED | MAP_FIXED, fd, 0);
+}
+#endif
+
 static inline int skel_closenz(int fd)
 {
 	if (fd > 0)
@@ -136,22 +287,28 @@  static inline int skel_link_create(int prog_fd, int target_fd,
 	return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz);
 }
 
+#ifdef __KERNEL__
+#define set_err
+#else
+#define set_err err = -errno
+#endif
+
 static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
 {
 	int map_fd = -1, prog_fd = -1, key = 0, err;
 	union bpf_attr attr;
 
-	map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1);
+	err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1);
 	if (map_fd < 0) {
 		opts->errstr = "failed to create loader map";
-		err = -errno;
+		set_err;
 		goto out;
 	}
 
 	err = skel_map_update_elem(map_fd, &key, opts->data, 0);
 	if (err < 0) {
 		opts->errstr = "failed to update loader map";
-		err = -errno;
+		set_err;
 		goto out;
 	}
 
@@ -166,10 +323,10 @@  static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
 	attr.log_size = opts->ctx->log_size;
 	attr.log_buf = opts->ctx->log_buf;
 	attr.prog_flags = BPF_F_SLEEPABLE;
-	prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+	err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
 	if (prog_fd < 0) {
 		opts->errstr = "failed to load loader prog";
-		err = -errno;
+		set_err;
 		goto out;
 	}
 
@@ -181,10 +338,12 @@  static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
 	if (err < 0 || (int)attr.test.retval < 0) {
 		opts->errstr = "failed to execute loader prog";
 		if (err < 0) {
-			err = -errno;
+			set_err;
 		} else {
 			err = (int)attr.test.retval;
+#ifndef __KERNEL__
 			errno = -err;
+#endif
 		}
 		goto out;
 	}