diff mbox series

bpf_task_storage improvement question

Message ID qwinzohs4pwawth5i6g7hfb2376pyfmkurbo2rwvglv77asbkr@mq2goetrtmpu (mailing list archive)
State RFC
Headers show
Series bpf_task_storage improvement question | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply

Commit Message

Christian Brauner Dec. 2, 2024, 12:38 p.m. UTC
Hey,

I just had to take a quick look at kernel/bpf/bpf_task_storage.c and
realized that you're doing:


	fd = *(int *)key;
	pid = pidfd_get_pid(fd, &f_flags);

	// something something

	task = pid_task(pid, PIDTYPE_PID);

	bpf_task_storage_lock();
	// something something
	bpf_task_storage_unlock();
	put_pid(pid);

That reference count bump on struct pid seems unnecessary and I suspect
your lookup routines are supposed to be fast. So why don't you just
open-code this. Something like:

It remains pinned by the pidfd anyway.

Comments

Martin KaFai Lau Dec. 2, 2024, 7:37 p.m. UTC | #1
On 12/2/24 4:38 AM, Christian Brauner wrote:
> Hey,
> 
> I just had to take a quick look at kernel/bpf/bpf_task_storage.c and
> realized that you're doing:
> 
> 
> 	fd = *(int *)key;
> 	pid = pidfd_get_pid(fd, &f_flags);
> 
> 	// something something
> 
> 	task = pid_task(pid, PIDTYPE_PID);
> 
> 	bpf_task_storage_lock();
> 	// something something
> 	bpf_task_storage_unlock();
> 	put_pid(pid);
> 
> That reference count bump on struct pid seems unnecessary and I suspect
> your lookup routines are supposed to be fast. So why don't you just
> open-code this. Something like:
> 
> diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
> index bf7fa15fdcc6..dc36a33c7b6d 100644
> --- a/kernel/bpf/bpf_task_storage.c
> +++ b/kernel/bpf/bpf_task_storage.c
> @@ -92,10 +92,12 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
>          struct task_struct *task;
>          unsigned int f_flags;
>          struct pid *pid;
> -       int fd, err;
> 
> -       fd = *(int *)key;
> -       pid = pidfd_get_pid(fd, &f_flags);
> +       CLASS(fd, f)(*(int *)key);
> +       if (fd_empty(f))
> +               return -EBADF;
> +
> +       pid = pidfd_pid(f);
>          if (IS_ERR(pid))
>                  return ERR_CAST(pid);
> 
> @@ -104,19 +106,13 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
>           */
>          WARN_ON_ONCE(!rcu_read_lock_held());
>          task = pid_task(pid, PIDTYPE_PID);
> -       if (!task) {
> -               err = -ENOENT;
> -               goto out;
> -       }
> +       if (!task)
> +               return ERR_PTR(-ENOENT);
> 
>          bpf_task_storage_lock();
>          sdata = task_storage_lookup(task, map, true);
>          bpf_task_storage_unlock();
> -       put_pid(pid);
>          return sdata ? sdata->data : NULL;
> -out:
> -       put_pid(pid);
> -       return ERR_PTR(err);
>   }
> 
> which avoids the reference count bumps on @pid.
> It remains pinned by the pidfd anyway.

The "bpf_pid_task_storage_lookup_elem()" is used by the syscall path which may 
be less looked at. The bpf prog uses another function "__bpf_task_storage_get()" 
which directly has a task pointer.

The change makes sense to me. A nice improvement on the syscall path. It will be 
great if you can post a patch for it. Thanks.
diff mbox series

Patch

diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index bf7fa15fdcc6..dc36a33c7b6d 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -92,10 +92,12 @@  static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
        struct task_struct *task;
        unsigned int f_flags;
        struct pid *pid;
-       int fd, err;

-       fd = *(int *)key;
-       pid = pidfd_get_pid(fd, &f_flags);
+       CLASS(fd, f)(*(int *)key);
+       if (fd_empty(f))
+               return -EBADF;
+
+       pid = pidfd_pid(f);
        if (IS_ERR(pid))
                return ERR_CAST(pid);

@@ -104,19 +106,13 @@  static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
         */
        WARN_ON_ONCE(!rcu_read_lock_held());
        task = pid_task(pid, PIDTYPE_PID);
-       if (!task) {
-               err = -ENOENT;
-               goto out;
-       }
+       if (!task)
+               return ERR_PTR(-ENOENT);

        bpf_task_storage_lock();
        sdata = task_storage_lookup(task, map, true);
        bpf_task_storage_unlock();
-       put_pid(pid);
        return sdata ? sdata->data : NULL;
-out:
-       put_pid(pid);
-       return ERR_PTR(err);
 }

which avoids the reference count bumps on @pid.