diff mbox series

[v3,bpf-next,02/15] bpf: net: Avoid sk_setsockopt() taking sk lock when called from bpf

Message ID 20220810190736.2693150-1-kafai@fb.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: net: Remove duplicated code from bpf_setsockopt() | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3022 this patch: 3022
netdev/cc_maintainers warning 7 maintainers not CCed: john.fastabend@gmail.com song@kernel.org martin.lau@linux.dev kpsingh@kernel.org jolsa@kernel.org haoluo@google.com yhs@fb.com
netdev/build_clang success Errors and warnings before: 626 this patch: 626
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 3134 this patch: 3134
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 89 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 fail Logs for Kernel LATEST on ubuntu-latest with gcc
bpf/vmtest-bpf-next-VM_Test-2 fail Logs for Kernel LATEST on ubuntu-latest with llvm-16
bpf/vmtest-bpf-next-VM_Test-3 fail Logs for Kernel LATEST on z15 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for llvm-toolchain

Commit Message

Martin KaFai Lau Aug. 10, 2022, 7:07 p.m. UTC
Most of the code in bpf_setsockopt(SOL_SOCKET) are duplicated from
the sk_setsockopt().  The number of supported optnames are
increasing ever and so as the duplicated code.

One issue in reusing sk_setsockopt() is that the bpf prog
has already acquired the sk lock.  This patch adds a
has_current_bpf_ctx() to tell if the sk_setsockopt() is called from
a bpf prog.  The bpf prog calling bpf_setsockopt() is either running
in_task() or in_serving_softirq().  Both cases have the current->bpf_ctx
initialized.  Thus, the has_current_bpf_ctx() only needs to
test !!current->bpf_ctx.

This patch also adds sockopt_{lock,release}_sock() helpers
for sk_setsockopt() to use.  These helpers will test
has_current_bpf_ctx() before acquiring/releasing the lock.  They are
in EXPORT_SYMBOL for the ipv6 module to use in a latter patch.

Note on the change in sock_setbindtodevice().  sockopt_lock_sock()
is done in sock_setbindtodevice() instead of doing the lock_sock
in sock_bindtoindex(..., lock_sk = true).

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/linux/bpf.h | 14 ++++++++++++++
 include/net/sock.h  |  3 +++
 net/core/sock.c     | 30 +++++++++++++++++++++++++++---
 3 files changed, 44 insertions(+), 3 deletions(-)

Comments

Andrii Nakryiko Aug. 16, 2022, 3:32 a.m. UTC | #1
On Wed, Aug 10, 2022 at 12:10 PM Martin KaFai Lau <kafai@fb.com> wrote:
>
> Most of the code in bpf_setsockopt(SOL_SOCKET) are duplicated from
> the sk_setsockopt().  The number of supported optnames are
> increasing ever and so as the duplicated code.
>
> One issue in reusing sk_setsockopt() is that the bpf prog
> has already acquired the sk lock.  This patch adds a
> has_current_bpf_ctx() to tell if the sk_setsockopt() is called from
> a bpf prog.  The bpf prog calling bpf_setsockopt() is either running
> in_task() or in_serving_softirq().  Both cases have the current->bpf_ctx
> initialized.  Thus, the has_current_bpf_ctx() only needs to
> test !!current->bpf_ctx.
>
> This patch also adds sockopt_{lock,release}_sock() helpers
> for sk_setsockopt() to use.  These helpers will test
> has_current_bpf_ctx() before acquiring/releasing the lock.  They are
> in EXPORT_SYMBOL for the ipv6 module to use in a latter patch.
>
> Note on the change in sock_setbindtodevice().  sockopt_lock_sock()
> is done in sock_setbindtodevice() instead of doing the lock_sock
> in sock_bindtoindex(..., lock_sk = true).
>
> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> ---
>  include/linux/bpf.h | 14 ++++++++++++++
>  include/net/sock.h  |  3 +++
>  net/core/sock.c     | 30 +++++++++++++++++++++++++++---
>  3 files changed, 44 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index a627a02cf8ab..0a600b2013cc 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1966,6 +1966,16 @@ static inline bool unprivileged_ebpf_enabled(void)
>         return !sysctl_unprivileged_bpf_disabled;
>  }
>
> +/* Not all bpf prog type has the bpf_ctx.
> + * Only trampoline and cgroup-bpf have it.

this is not true already (perf_event and kprobe/uprobe/tp progs have
bpf_ctx as well) and can easily get out of sync in the future, so I'd
drop the list of types that support bpf_ctx.

> + * For the bpf prog type that has initialized the bpf_ctx,
> + * this function can be used to decide if a kernel function
> + * is called by a bpf program.
> + */
> +static inline bool has_current_bpf_ctx(void)
> +{
> +       return !!current->bpf_ctx;
> +}
>  #else /* !CONFIG_BPF_SYSCALL */

[...]
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a627a02cf8ab..0a600b2013cc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1966,6 +1966,16 @@  static inline bool unprivileged_ebpf_enabled(void)
 	return !sysctl_unprivileged_bpf_disabled;
 }
 
+/* Not all bpf prog type has the bpf_ctx.
+ * Only trampoline and cgroup-bpf have it.
+ * For the bpf prog type that has initialized the bpf_ctx,
+ * this function can be used to decide if a kernel function
+ * is called by a bpf program.
+ */
+static inline bool has_current_bpf_ctx(void)
+{
+	return !!current->bpf_ctx;
+}
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -2175,6 +2185,10 @@  static inline bool unprivileged_ebpf_enabled(void)
 	return false;
 }
 
+static inline bool has_current_bpf_ctx(void)
+{
+	return false;
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
diff --git a/include/net/sock.h b/include/net/sock.h
index a7273b289188..b2ff230860c6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1721,6 +1721,9 @@  static inline void unlock_sock_fast(struct sock *sk, bool slow)
 	}
 }
 
+void sockopt_lock_sock(struct sock *sk);
+void sockopt_release_sock(struct sock *sk);
+
 /* Used by processes to "lock" a socket state, so that
  * interrupts and bottom half handlers won't change it
  * from under us. It essentially blocks any incoming
diff --git a/net/core/sock.c b/net/core/sock.c
index 20269c37ab3b..d3683228376f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -703,7 +703,9 @@  static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
 			goto out;
 	}
 
-	return sock_bindtoindex(sk, index, true);
+	sockopt_lock_sock(sk);
+	ret = sock_bindtoindex_locked(sk, index);
+	sockopt_release_sock(sk);
 out:
 #endif
 
@@ -1036,6 +1038,28 @@  static int sock_reserve_memory(struct sock *sk, int bytes)
 	return 0;
 }
 
+void sockopt_lock_sock(struct sock *sk)
+{
+	/* When current->bpf_ctx is set, the setsockopt is called from
+	 * a bpf prog.  bpf has ensured the sk lock has been
+	 * acquired before calling setsockopt().
+	 */
+	if (has_current_bpf_ctx())
+		return;
+
+	lock_sock(sk);
+}
+EXPORT_SYMBOL(sockopt_lock_sock);
+
+void sockopt_release_sock(struct sock *sk)
+{
+	if (has_current_bpf_ctx())
+		return;
+
+	release_sock(sk);
+}
+EXPORT_SYMBOL(sockopt_release_sock);
+
 /*
  *	This is meant for all protocols to use and covers goings on
  *	at the socket level. Everything here is generic.
@@ -1067,7 +1091,7 @@  static int sk_setsockopt(struct sock *sk, int level, int optname,
 
 	valbool = val ? 1 : 0;
 
-	lock_sock(sk);
+	sockopt_lock_sock(sk);
 
 	switch (optname) {
 	case SO_DEBUG:
@@ -1496,7 +1520,7 @@  static int sk_setsockopt(struct sock *sk, int level, int optname,
 		ret = -ENOPROTOOPT;
 		break;
 	}
-	release_sock(sk);
+	sockopt_release_sock(sk);
 	return ret;
 }