diff mbox series

[trace/for-next,2/3] bpf: decouple BPF link/attach hook and BPF program sleepable semantics

Message ID 20241031210938.1696639-2-andrii@kernel.org (mailing list archive)
State Superseded
Headers show
Series [trace/for-next,1/3] bpf: put bpf_link's program when link is safe to be deallocated | expand

Commit Message

Andrii Nakryiko Oct. 31, 2024, 9:09 p.m. UTC
BPF link's lifecycle protection scheme depends on both BPF hook and BPF
program. If *either* of those require RCU Tasks Trace GP, then we need
to go through a chain of GPs before putting BPF program refcount and
deallocating BPF link memory.

This patch adds bpf_link-specific sleepable flag, which can be set to
true even if underlying BPF program is not sleepable itself. If either
link->sleepable or link->prog->sleepable is true, we'll go through
a chain of RCU Tasks Trace GP and RCU GP before putting BPF program and
freeing memory.

This will be used to protect BPF link for sleepable (faultable) raw
tracepoints in the next patch.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf.h  | 22 +++++++++++++++++++---
 kernel/bpf/syscall.c | 39 ++++++++++++++++++++++++++++-----------
 2 files changed, 47 insertions(+), 14 deletions(-)

Comments

kernel test robot Nov. 1, 2024, 5:07 a.m. UTC | #1
Hi Andrii,

kernel test robot noticed the following build errors:

[auto build test ERROR on trace/for-next]

url:    https://github.com/intel-lab-lkp/linux/commits/Andrii-Nakryiko/bpf-decouple-BPF-link-attach-hook-and-BPF-program-sleepable-semantics/20241101-051131
base:   https://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace for-next
patch link:    https://lore.kernel.org/r/20241031210938.1696639-2-andrii%40kernel.org
patch subject: [PATCH trace/for-next 2/3] bpf: decouple BPF link/attach hook and BPF program sleepable semantics
config: x86_64-defconfig (https://download.01.org/0day-ci/archive/20241101/202411011244.LrXOUj8p-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241101/202411011244.LrXOUj8p-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202411011244.LrXOUj8p-lkp@intel.com/

All errors (new ones prefixed by >>):

   net/core/dev.c: In function 'bpf_xdp_link_attach':
>> net/core/dev.c:9767:9: error: too few arguments to function 'bpf_link_init'
    9767 |         bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
         |         ^~~~~~~~~~~~~
   In file included from include/linux/security.h:35,
                    from include/net/scm.h:9,
                    from include/linux/netlink.h:9,
                    from include/uapi/linux/neighbour.h:6,
                    from include/linux/netdevice.h:44,
                    from net/core/dev.c:92:
   include/linux/bpf.h:2724:20: note: declared here
    2724 | static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
         |                    ^~~~~~~~~~~~~


vim +/bpf_link_init +9767 net/core/dev.c

aa8d3a716b59db Andrii Nakryiko 2020-07-21  9744  
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9745  int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9746  {
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9747  	struct net *net = current->nsproxy->net_ns;
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9748  	struct bpf_link_primer link_primer;
bf4ea1d0b2cb22 Leon Hwang      2023-08-01  9749  	struct netlink_ext_ack extack = {};
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9750  	struct bpf_xdp_link *link;
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9751  	struct net_device *dev;
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9752  	int err, fd;
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9753  
5acc7d3e8d3428 Xuan Zhuo       2021-07-10  9754  	rtnl_lock();
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9755  	dev = dev_get_by_index(net, attr->link_create.target_ifindex);
5acc7d3e8d3428 Xuan Zhuo       2021-07-10  9756  	if (!dev) {
5acc7d3e8d3428 Xuan Zhuo       2021-07-10  9757  		rtnl_unlock();
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9758  		return -EINVAL;
5acc7d3e8d3428 Xuan Zhuo       2021-07-10  9759  	}
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9760  
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9761  	link = kzalloc(sizeof(*link), GFP_USER);
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9762  	if (!link) {
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9763  		err = -ENOMEM;
5acc7d3e8d3428 Xuan Zhuo       2021-07-10  9764  		goto unlock;
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9765  	}
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9766  
aa8d3a716b59db Andrii Nakryiko 2020-07-21 @9767  	bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9768  	link->dev = dev;
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9769  	link->flags = attr->link_create.flags;
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9770  
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9771  	err = bpf_link_prime(&link->link, &link_primer);
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9772  	if (err) {
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9773  		kfree(link);
5acc7d3e8d3428 Xuan Zhuo       2021-07-10  9774  		goto unlock;
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9775  	}
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9776  
bf4ea1d0b2cb22 Leon Hwang      2023-08-01  9777  	err = dev_xdp_attach_link(dev, &extack, link);
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9778  	rtnl_unlock();
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9779  
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9780  	if (err) {
5acc7d3e8d3428 Xuan Zhuo       2021-07-10  9781  		link->dev = NULL;
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9782  		bpf_link_cleanup(&link_primer);
bf4ea1d0b2cb22 Leon Hwang      2023-08-01  9783  		trace_bpf_xdp_link_attach_failed(extack._msg);
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9784  		goto out_put_dev;
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9785  	}
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9786  
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9787  	fd = bpf_link_settle(&link_primer);
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9788  	/* link itself doesn't hold dev's refcnt to not complicate shutdown */
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9789  	dev_put(dev);
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9790  	return fd;
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9791  
5acc7d3e8d3428 Xuan Zhuo       2021-07-10  9792  unlock:
5acc7d3e8d3428 Xuan Zhuo       2021-07-10  9793  	rtnl_unlock();
5acc7d3e8d3428 Xuan Zhuo       2021-07-10  9794  
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9795  out_put_dev:
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9796  	dev_put(dev);
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9797  	return err;
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9798  }
aa8d3a716b59db Andrii Nakryiko 2020-07-21  9799
Alexei Starovoitov Nov. 1, 2024, 4:26 p.m. UTC | #2
On Thu, Oct 31, 2024 at 2:23 PM Andrii Nakryiko <andrii@kernel.org> wrote:
>
>  static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
>                                  const struct bpf_link_ops *ops,
> -                                struct bpf_prog *prog)
> +                                struct bpf_prog *prog, bool sleepable)
> +{
> +}

Obvious typo caught by build bot...
Other than that the set looks good.
Andrii Nakryiko Nov. 1, 2024, 5:51 p.m. UTC | #3
On Fri, Nov 1, 2024 at 9:27 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Thu, Oct 31, 2024 at 2:23 PM Andrii Nakryiko <andrii@kernel.org> wrote:
> >
> >  static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
> >                                  const struct bpf_link_ops *ops,
> > -                                struct bpf_prog *prog)
> > +                                struct bpf_prog *prog, bool sleepable)
> > +{
> > +}
>
> Obvious typo caught by build bot...
> Other than that the set looks good.


Yeah, leftover from the initial attempt (I decided to not touch
bpf_link_init() in the end to avoid updating like 20 places where we
do this for various link types). I'll send a new version.
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 19d8ca8ac960..83f44caf47e1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1598,6 +1598,11 @@  struct bpf_link {
 	enum bpf_link_type type;
 	const struct bpf_link_ops *ops;
 	struct bpf_prog *prog;
+	/* whether BPF link itself has "sleepable" semantics, which can differ
+	 * from underlying BPF program having a "sleepable" semantics, as BPF
+	 * link's semantics is determined by target attach hook
+	 */
+	bool sleepable;
 	/* rcu is used before freeing, work can be used to schedule that
 	 * RCU-based freeing before that, so they never overlap
 	 */
@@ -1614,8 +1619,10 @@  struct bpf_link_ops {
 	 */
 	void (*dealloc)(struct bpf_link *link);
 	/* deallocate link resources callback, called after RCU grace period;
-	 * if underlying BPF program is sleepable we go through tasks trace
-	 * RCU GP and then "classic" RCU GP
+	 * if either the underlying BPF program is sleepable or BPF link's
+	 * target hook is sleepable, we'll go through tasks trace RCU GP and
+	 * then "classic" RCU GP; this need for chaining tasks trace and
+	 * classic RCU GPs is designated by setting bpf_link->sleepable flag
 	 */
 	void (*dealloc_deferred)(struct bpf_link *link);
 	int (*detach)(struct bpf_link *link);
@@ -2362,6 +2369,9 @@  int bpf_prog_new_fd(struct bpf_prog *prog);
 
 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
 		   const struct bpf_link_ops *ops, struct bpf_prog *prog);
+void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type,
+			     const struct bpf_link_ops *ops, struct bpf_prog *prog,
+			     bool sleepable);
 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer);
 int bpf_link_settle(struct bpf_link_primer *primer);
 void bpf_link_cleanup(struct bpf_link_primer *primer);
@@ -2713,7 +2723,13 @@  bpf_prog_inc_not_zero(struct bpf_prog *prog)
 
 static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
 				 const struct bpf_link_ops *ops,
-				 struct bpf_prog *prog)
+				 struct bpf_prog *prog, bool sleepable)
+{
+}
+
+static inline void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type,
+					   const struct bpf_link_ops *ops, struct bpf_prog *prog,
+					   bool sleepable)
 {
 }
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index aa7246a399f3..0f5540627911 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2933,17 +2933,33 @@  static int bpf_obj_get(const union bpf_attr *attr)
 				attr->file_flags);
 }
 
-void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
-		   const struct bpf_link_ops *ops, struct bpf_prog *prog)
+/* bpf_link_init_sleepable() allows to specify whether BPF link itself has
+ * "sleepable" semantics, which normally would mean that BPF link's attach
+ * hook can dereference link or link's underlying program for some time after
+ * detachment due to RCU Tasks Trace-based lifetime protection scheme.
+ * BPF program itself can be non-sleepable, yet, because it's transitively
+ * reachable through BPF link, its freeing has to be delayed until after RCU
+ * Tasks Trace GP.
+ */
+void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type,
+			     const struct bpf_link_ops *ops, struct bpf_prog *prog,
+			     bool sleepable)
 {
 	WARN_ON(ops->dealloc && ops->dealloc_deferred);
 	atomic64_set(&link->refcnt, 1);
 	link->type = type;
+	link->sleepable = sleepable;
 	link->id = 0;
 	link->ops = ops;
 	link->prog = prog;
 }
 
+void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
+		   const struct bpf_link_ops *ops, struct bpf_prog *prog)
+{
+	bpf_link_init_sleepable(link, type, ops, prog, false);
+}
+
 static void bpf_link_free_id(int id)
 {
 	if (!id)
@@ -3008,20 +3024,21 @@  static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
 static void bpf_link_free(struct bpf_link *link)
 {
 	const struct bpf_link_ops *ops = link->ops;
-	bool sleepable = false;
 
 	bpf_link_free_id(link->id);
-	if (link->prog) {
-		sleepable = link->prog->sleepable;
-		/* detach BPF program, clean up used resources */
+	/* detach BPF program, clean up used resources */
+	if (link->prog)
 		ops->release(link);
-	}
 	if (ops->dealloc_deferred) {
-		/* schedule BPF link deallocation; if underlying BPF program
-		 * is sleepable, we need to first wait for RCU tasks trace
-		 * sync, then go through "classic" RCU grace period
+		/* Schedule BPF link deallocation, which will only then
+		 * trigger putting BPF program refcount.
+		 * If underlying BPF program is sleepable or BPF link's target
+		 * attach hookpoint is sleepable or otherwise requires RCU GPs
+		 * to ensure link and its underlying BPF program is not
+		 * reachable anymore, we need to first wait for RCU tasks
+		 * trace sync, and then go through "classic" RCU grace period
 		 */
-		if (sleepable)
+		if (link->sleepable || (link->prog && link->prog->sleepable))
 			call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
 		else
 			call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);