diff mbox series

[22/24] rcu/tiny: support reclaim for head-less object

Message ID 20200428205903.61704-23-urezki@gmail.com (mailing list archive)
State New, archived
Headers show
Series Introduce kvfree_rcu(1 or 2 arguments) | expand

Commit Message

Uladzislau Rezki April 28, 2020, 8:59 p.m. UTC
Make a kvfree_call_rcu() function to support head-less
freeing. Same as for tree-RCU, for such purpose we store
pointers in array. SLAB and vmalloc ptrs. are mixed and
coexist together.

Under high memory pressure it can be that maintaining of
arrays becomes impossible. Objects with an rcu_head are
released via call_rcu(). When it comes to the head-less
variant, the kvfree() call is directly inlined, i.e. we
do the same as for tree-RCU:
    a) wait until a grace period has elapsed;
    b) direct inlining of the kvfree() call.

Thus the current context has to follow might_sleep()
annotation. Also please note that for tiny-RCU any
call of synchronize_rcu() is actually a quiescent
state, therefore (a) does nothing.

Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 kernel/rcu/tiny.c | 157 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 156 insertions(+), 1 deletion(-)

Comments

Paul E. McKenney May 1, 2020, 11:06 p.m. UTC | #1
On Tue, Apr 28, 2020 at 10:59:01PM +0200, Uladzislau Rezki (Sony) wrote:
> Make a kvfree_call_rcu() function to support head-less
> freeing. Same as for tree-RCU, for such purpose we store
> pointers in array. SLAB and vmalloc ptrs. are mixed and
> coexist together.
> 
> Under high memory pressure it can be that maintaining of
> arrays becomes impossible. Objects with an rcu_head are
> released via call_rcu(). When it comes to the head-less
> variant, the kvfree() call is directly inlined, i.e. we
> do the same as for tree-RCU:
>     a) wait until a grace period has elapsed;
>     b) direct inlining of the kvfree() call.
> 
> Thus the current context has to follow might_sleep()
> annotation. Also please note that for tiny-RCU any
> call of synchronize_rcu() is actually a quiescent
> state, therefore (a) does nothing.

Please, please, please just do synchronize_rcu() followed by kvfree()
for single-argument kfree_rcu() and friends in Tiny RCU.

Way simpler and probably way faster as well.  And given that Tiny RCU
runs only on uniprocessor systems, the complexity probably is buying
you very little, if anything.

							Thanx, Paul

> Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> ---
>  kernel/rcu/tiny.c | 157 +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 156 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
> index 508c82faa45c..b1c31a935db9 100644
> --- a/kernel/rcu/tiny.c
> +++ b/kernel/rcu/tiny.c
> @@ -40,6 +40,29 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
>  	.curtail	= &rcu_ctrlblk.rcucblist,
>  };
>  
> +/* Can be common with tree-RCU. */
> +#define KVFREE_DRAIN_JIFFIES (HZ / 50)
> +
> +/* Can be common with tree-RCU. */
> +struct kvfree_rcu_bulk_data {
> +	unsigned long nr_records;
> +	struct kvfree_rcu_bulk_data *next;
> +	void *records[];
> +};
> +
> +/* Can be common with tree-RCU. */
> +#define KVFREE_BULK_MAX_ENTR \
> +	((PAGE_SIZE - sizeof(struct kvfree_rcu_bulk_data)) / sizeof(void *))
> +
> +static struct kvfree_rcu_bulk_data *kvhead;
> +static struct kvfree_rcu_bulk_data *kvhead_free;
> +static struct kvfree_rcu_bulk_data *kvcache;
> +
> +static DEFINE_STATIC_KEY_FALSE(rcu_init_done);
> +static struct delayed_work monitor_work;
> +static struct rcu_work rcu_work;
> +static bool monitor_todo;
> +
>  void rcu_barrier(void)
>  {
>  	wait_rcu_gp(call_rcu);
> @@ -177,9 +200,137 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
>  }
>  EXPORT_SYMBOL_GPL(call_rcu);
>  
> +static inline bool
> +kvfree_call_rcu_add_ptr_to_bulk(void *ptr)
> +{
> +	struct kvfree_rcu_bulk_data *bnode;
> +
> +	if (!kvhead || kvhead->nr_records == KVFREE_BULK_MAX_ENTR) {
> +		bnode = xchg(&kvcache, NULL);
> +		if (!bnode)
> +			bnode = (struct kvfree_rcu_bulk_data *)
> +				__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
> +
> +		if (unlikely(!bnode))
> +			return false;
> +
> +		/* Initialize the new block. */
> +		bnode->nr_records = 0;
> +		bnode->next = kvhead;
> +
> +		/* Attach it to the bvhead. */
> +		kvhead = bnode;
> +	}
> +
> +	/* Done. */
> +	kvhead->records[kvhead->nr_records++] = ptr;
> +	return true;
> +}
> +
> +static void
> +kvfree_rcu_work(struct work_struct *work)
> +{
> +	struct kvfree_rcu_bulk_data *kvhead_tofree, *next;
> +	unsigned long flags;
> +	int i;
> +
> +	local_irq_save(flags);
> +	kvhead_tofree = kvhead_free;
> +	kvhead_free = NULL;
> +	local_irq_restore(flags);
> +
> +	/* Reclaim process. */
> +	for (; kvhead_tofree; kvhead_tofree = next) {
> +		next = kvhead_tofree->next;
> +
> +		for (i = 0; i < kvhead_tofree->nr_records; i++) {
> +			debug_rcu_head_unqueue((struct rcu_head *)
> +				kvhead_tofree->records[i]);
> +			kvfree(kvhead_tofree->records[i]);
> +		}
> +
> +		if (cmpxchg(&kvcache, NULL, kvhead_tofree))
> +			free_page((unsigned long) kvhead_tofree);
> +	}
> +}
> +
> +static inline bool
> +queue_kvfree_rcu_work(void)
> +{
> +	/* Check if the free channel is available. */
> +	if (kvhead_free)
> +		return false;
> +
> +	kvhead_free = kvhead;
> +	kvhead = NULL;
> +
> +	/*
> +	 * Queue the job for memory reclaim after GP.
> +	 */
> +	queue_rcu_work(system_wq, &rcu_work);
> +	return true;
> +}
> +
> +static void kvfree_rcu_monitor(struct work_struct *work)
> +{
> +	unsigned long flags;
> +	bool queued;
> +
> +	local_irq_save(flags);
> +	queued = queue_kvfree_rcu_work();
> +	if (queued)
> +		/* Success. */
> +		monitor_todo = false;
> +	local_irq_restore(flags);
> +
> +	/*
> +	 * If previous RCU reclaim process is still in progress,
> +	 * schedule the work one more time to try again later.
> +	 */
> +	if (monitor_todo)
> +		schedule_delayed_work(&monitor_work,
> +			KVFREE_DRAIN_JIFFIES);
> +}
> +
>  void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
>  {
> -	call_rcu(head, func);
> +	unsigned long flags;
> +	bool success;
> +	void *ptr;
> +
> +	if (head) {
> +		ptr = (void *) head - (unsigned long) func;
> +	} else {
> +		might_sleep();
> +		ptr = (void *) func;
> +	}
> +
> +	if (debug_rcu_head_queue(ptr)) {
> +		/* Probable double free, just leak. */
> +		WARN_ONCE(1, "%s(): Double-freed call. rcu_head %p\n",
> +			  __func__, head);
> +		return;
> +	}
> +
> +	local_irq_save(flags);
> +	success = kvfree_call_rcu_add_ptr_to_bulk(ptr);
> +	if (static_branch_likely(&rcu_init_done)) {
> +		if (success && !monitor_todo) {
> +			monitor_todo = true;
> +			schedule_delayed_work(&monitor_work,
> +				KVFREE_DRAIN_JIFFIES);
> +		}
> +	}
> +	local_irq_restore(flags);
> +
> +	if (!success) {
> +		if (!head) {
> +			synchronize_rcu();
> +			kvfree(ptr);
> +		} else {
> +			call_rcu(head, func);
> +		}
> +	}
>  }
>  EXPORT_SYMBOL_GPL(kvfree_call_rcu);
>  
> @@ -188,4 +339,8 @@ void __init rcu_init(void)
>  	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
>  	rcu_early_boot_tests();
>  	srcu_init();
> +
> +	INIT_DELAYED_WORK(&monitor_work, kvfree_rcu_monitor);
> +	INIT_RCU_WORK(&rcu_work, kvfree_rcu_work);
> +	static_branch_enable(&rcu_init_done);
>  }
> -- 
> 2.20.1
>
Joel Fernandes May 4, 2020, 12:27 a.m. UTC | #2
On Fri, May 01, 2020 at 04:06:38PM -0700, Paul E. McKenney wrote:
> On Tue, Apr 28, 2020 at 10:59:01PM +0200, Uladzislau Rezki (Sony) wrote:
> > Make a kvfree_call_rcu() function to support head-less
> > freeing. Same as for tree-RCU, for such purpose we store
> > pointers in array. SLAB and vmalloc ptrs. are mixed and
> > coexist together.
> > 
> > Under high memory pressure it can be that maintaining of
> > arrays becomes impossible. Objects with an rcu_head are
> > released via call_rcu(). When it comes to the head-less
> > variant, the kvfree() call is directly inlined, i.e. we
> > do the same as for tree-RCU:
> >     a) wait until a grace period has elapsed;
> >     b) direct inlining of the kvfree() call.
> > 
> > Thus the current context has to follow might_sleep()
> > annotation. Also please note that for tiny-RCU any
> > call of synchronize_rcu() is actually a quiescent
> > state, therefore (a) does nothing.
> 
> Please, please, please just do synchronize_rcu() followed by kvfree()
> for single-argument kfree_rcu() and friends in Tiny RCU.
> 
> Way simpler and probably way faster as well.  And given that Tiny RCU
> runs only on uniprocessor systems, the complexity probably is buying
> you very little, if anything.

Agreed.

thanks,

 - Joel

> 
> 							Thanx, Paul
> 
> > Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> > ---
> >  kernel/rcu/tiny.c | 157 +++++++++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 156 insertions(+), 1 deletion(-)
> > 
> > diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
> > index 508c82faa45c..b1c31a935db9 100644
> > --- a/kernel/rcu/tiny.c
> > +++ b/kernel/rcu/tiny.c
> > @@ -40,6 +40,29 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
> >  	.curtail	= &rcu_ctrlblk.rcucblist,
> >  };
> >  
> > +/* Can be common with tree-RCU. */
> > +#define KVFREE_DRAIN_JIFFIES (HZ / 50)
> > +
> > +/* Can be common with tree-RCU. */
> > +struct kvfree_rcu_bulk_data {
> > +	unsigned long nr_records;
> > +	struct kvfree_rcu_bulk_data *next;
> > +	void *records[];
> > +};
> > +
> > +/* Can be common with tree-RCU. */
> > +#define KVFREE_BULK_MAX_ENTR \
> > +	((PAGE_SIZE - sizeof(struct kvfree_rcu_bulk_data)) / sizeof(void *))
> > +
> > +static struct kvfree_rcu_bulk_data *kvhead;
> > +static struct kvfree_rcu_bulk_data *kvhead_free;
> > +static struct kvfree_rcu_bulk_data *kvcache;
> > +
> > +static DEFINE_STATIC_KEY_FALSE(rcu_init_done);
> > +static struct delayed_work monitor_work;
> > +static struct rcu_work rcu_work;
> > +static bool monitor_todo;
> > +
> >  void rcu_barrier(void)
> >  {
> >  	wait_rcu_gp(call_rcu);
> > @@ -177,9 +200,137 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
> >  }
> >  EXPORT_SYMBOL_GPL(call_rcu);
> >  
> > +static inline bool
> > +kvfree_call_rcu_add_ptr_to_bulk(void *ptr)
> > +{
> > +	struct kvfree_rcu_bulk_data *bnode;
> > +
> > +	if (!kvhead || kvhead->nr_records == KVFREE_BULK_MAX_ENTR) {
> > +		bnode = xchg(&kvcache, NULL);
> > +		if (!bnode)
> > +			bnode = (struct kvfree_rcu_bulk_data *)
> > +				__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
> > +
> > +		if (unlikely(!bnode))
> > +			return false;
> > +
> > +		/* Initialize the new block. */
> > +		bnode->nr_records = 0;
> > +		bnode->next = kvhead;
> > +
> > +		/* Attach it to the bvhead. */
> > +		kvhead = bnode;
> > +	}
> > +
> > +	/* Done. */
> > +	kvhead->records[kvhead->nr_records++] = ptr;
> > +	return true;
> > +}
> > +
> > +static void
> > +kvfree_rcu_work(struct work_struct *work)
> > +{
> > +	struct kvfree_rcu_bulk_data *kvhead_tofree, *next;
> > +	unsigned long flags;
> > +	int i;
> > +
> > +	local_irq_save(flags);
> > +	kvhead_tofree = kvhead_free;
> > +	kvhead_free = NULL;
> > +	local_irq_restore(flags);
> > +
> > +	/* Reclaim process. */
> > +	for (; kvhead_tofree; kvhead_tofree = next) {
> > +		next = kvhead_tofree->next;
> > +
> > +		for (i = 0; i < kvhead_tofree->nr_records; i++) {
> > +			debug_rcu_head_unqueue((struct rcu_head *)
> > +				kvhead_tofree->records[i]);
> > +			kvfree(kvhead_tofree->records[i]);
> > +		}
> > +
> > +		if (cmpxchg(&kvcache, NULL, kvhead_tofree))
> > +			free_page((unsigned long) kvhead_tofree);
> > +	}
> > +}
> > +
> > +static inline bool
> > +queue_kvfree_rcu_work(void)
> > +{
> > +	/* Check if the free channel is available. */
> > +	if (kvhead_free)
> > +		return false;
> > +
> > +	kvhead_free = kvhead;
> > +	kvhead = NULL;
> > +
> > +	/*
> > +	 * Queue the job for memory reclaim after GP.
> > +	 */
> > +	queue_rcu_work(system_wq, &rcu_work);
> > +	return true;
> > +}
> > +
> > +static void kvfree_rcu_monitor(struct work_struct *work)
> > +{
> > +	unsigned long flags;
> > +	bool queued;
> > +
> > +	local_irq_save(flags);
> > +	queued = queue_kvfree_rcu_work();
> > +	if (queued)
> > +		/* Success. */
> > +		monitor_todo = false;
> > +	local_irq_restore(flags);
> > +
> > +	/*
> > +	 * If previous RCU reclaim process is still in progress,
> > +	 * schedule the work one more time to try again later.
> > +	 */
> > +	if (monitor_todo)
> > +		schedule_delayed_work(&monitor_work,
> > +			KVFREE_DRAIN_JIFFIES);
> > +}
> > +
> >  void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
> >  {
> > -	call_rcu(head, func);
> > +	unsigned long flags;
> > +	bool success;
> > +	void *ptr;
> > +
> > +	if (head) {
> > +		ptr = (void *) head - (unsigned long) func;
> > +	} else {
> > +		might_sleep();
> > +		ptr = (void *) func;
> > +	}
> > +
> > +	if (debug_rcu_head_queue(ptr)) {
> > +		/* Probable double free, just leak. */
> > +		WARN_ONCE(1, "%s(): Double-freed call. rcu_head %p\n",
> > +			  __func__, head);
> > +		return;
> > +	}
> > +
> > +	local_irq_save(flags);
> > +	success = kvfree_call_rcu_add_ptr_to_bulk(ptr);
> > +	if (static_branch_likely(&rcu_init_done)) {
> > +		if (success && !monitor_todo) {
> > +			monitor_todo = true;
> > +			schedule_delayed_work(&monitor_work,
> > +				KVFREE_DRAIN_JIFFIES);
> > +		}
> > +	}
> > +	local_irq_restore(flags);
> > +
> > +	if (!success) {
> > +		if (!head) {
> > +			synchronize_rcu();
> > +			kvfree(ptr);
> > +		} else {
> > +			call_rcu(head, func);
> > +		}
> > +	}
> >  }
> >  EXPORT_SYMBOL_GPL(kvfree_call_rcu);
> >  
> > @@ -188,4 +339,8 @@ void __init rcu_init(void)
> >  	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
> >  	rcu_early_boot_tests();
> >  	srcu_init();
> > +
> > +	INIT_DELAYED_WORK(&monitor_work, kvfree_rcu_monitor);
> > +	INIT_RCU_WORK(&rcu_work, kvfree_rcu_work);
> > +	static_branch_enable(&rcu_init_done);
> >  }
> > -- 
> > 2.20.1
> >
Uladzislau Rezki May 4, 2020, 12:45 p.m. UTC | #3
On Sun, May 03, 2020 at 08:27:00PM -0400, Joel Fernandes wrote:
> On Fri, May 01, 2020 at 04:06:38PM -0700, Paul E. McKenney wrote:
> > On Tue, Apr 28, 2020 at 10:59:01PM +0200, Uladzislau Rezki (Sony) wrote:
> > > Make a kvfree_call_rcu() function to support head-less
> > > freeing. Same as for tree-RCU, for such purpose we store
> > > pointers in array. SLAB and vmalloc ptrs. are mixed and
> > > coexist together.
> > > 
> > > Under high memory pressure it can be that maintaining of
> > > arrays becomes impossible. Objects with an rcu_head are
> > > released via call_rcu(). When it comes to the head-less
> > > variant, the kvfree() call is directly inlined, i.e. we
> > > do the same as for tree-RCU:
> > >     a) wait until a grace period has elapsed;
> > >     b) direct inlining of the kvfree() call.
> > > 
> > > Thus the current context has to follow might_sleep()
> > > annotation. Also please note that for tiny-RCU any
> > > call of synchronize_rcu() is actually a quiescent
> > > state, therefore (a) does nothing.
> > 
> > Please, please, please just do synchronize_rcu() followed by kvfree()
> > for single-argument kfree_rcu() and friends in Tiny RCU.
> > 
> > Way simpler and probably way faster as well.  And given that Tiny RCU
> > runs only on uniprocessor systems, the complexity probably is buying
> > you very little, if anything.
> 
> Agreed.
> 
Cool. Agree also :)

--
Vlad Rezki
diff mbox series

Patch

diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 508c82faa45c..b1c31a935db9 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -40,6 +40,29 @@  static struct rcu_ctrlblk rcu_ctrlblk = {
 	.curtail	= &rcu_ctrlblk.rcucblist,
 };
 
+/* Can be common with tree-RCU. */
+#define KVFREE_DRAIN_JIFFIES (HZ / 50)
+
+/* Can be common with tree-RCU. */
+struct kvfree_rcu_bulk_data {
+	unsigned long nr_records;
+	struct kvfree_rcu_bulk_data *next;
+	void *records[];
+};
+
+/* Can be common with tree-RCU. */
+#define KVFREE_BULK_MAX_ENTR \
+	((PAGE_SIZE - sizeof(struct kvfree_rcu_bulk_data)) / sizeof(void *))
+
+static struct kvfree_rcu_bulk_data *kvhead;
+static struct kvfree_rcu_bulk_data *kvhead_free;
+static struct kvfree_rcu_bulk_data *kvcache;
+
+static DEFINE_STATIC_KEY_FALSE(rcu_init_done);
+static struct delayed_work monitor_work;
+static struct rcu_work rcu_work;
+static bool monitor_todo;
+
 void rcu_barrier(void)
 {
 	wait_rcu_gp(call_rcu);
@@ -177,9 +200,137 @@  void call_rcu(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 
+static inline bool
+kvfree_call_rcu_add_ptr_to_bulk(void *ptr)
+{
+	struct kvfree_rcu_bulk_data *bnode;
+
+	if (!kvhead || kvhead->nr_records == KVFREE_BULK_MAX_ENTR) {
+		bnode = xchg(&kvcache, NULL);
+		if (!bnode)
+			bnode = (struct kvfree_rcu_bulk_data *)
+				__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+
+		if (unlikely(!bnode))
+			return false;
+
+		/* Initialize the new block. */
+		bnode->nr_records = 0;
+		bnode->next = kvhead;
+
+		/* Attach it to the bvhead. */
+		kvhead = bnode;
+	}
+
+	/* Done. */
+	kvhead->records[kvhead->nr_records++] = ptr;
+	return true;
+}
+
+static void
+kvfree_rcu_work(struct work_struct *work)
+{
+	struct kvfree_rcu_bulk_data *kvhead_tofree, *next;
+	unsigned long flags;
+	int i;
+
+	local_irq_save(flags);
+	kvhead_tofree = kvhead_free;
+	kvhead_free = NULL;
+	local_irq_restore(flags);
+
+	/* Reclaim process. */
+	for (; kvhead_tofree; kvhead_tofree = next) {
+		next = kvhead_tofree->next;
+
+		for (i = 0; i < kvhead_tofree->nr_records; i++) {
+			debug_rcu_head_unqueue((struct rcu_head *)
+				kvhead_tofree->records[i]);
+			kvfree(kvhead_tofree->records[i]);
+		}
+
+		if (cmpxchg(&kvcache, NULL, kvhead_tofree))
+			free_page((unsigned long) kvhead_tofree);
+	}
+}
+
+static inline bool
+queue_kvfree_rcu_work(void)
+{
+	/* Check if the free channel is available. */
+	if (kvhead_free)
+		return false;
+
+	kvhead_free = kvhead;
+	kvhead = NULL;
+
+	/*
+	 * Queue the job for memory reclaim after GP.
+	 */
+	queue_rcu_work(system_wq, &rcu_work);
+	return true;
+}
+
+static void kvfree_rcu_monitor(struct work_struct *work)
+{
+	unsigned long flags;
+	bool queued;
+
+	local_irq_save(flags);
+	queued = queue_kvfree_rcu_work();
+	if (queued)
+		/* Success. */
+		monitor_todo = false;
+	local_irq_restore(flags);
+
+	/*
+	 * If previous RCU reclaim process is still in progress,
+	 * schedule the work one more time to try again later.
+	 */
+	if (monitor_todo)
+		schedule_delayed_work(&monitor_work,
+			KVFREE_DRAIN_JIFFIES);
+}
+
 void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
-	call_rcu(head, func);
+	unsigned long flags;
+	bool success;
+	void *ptr;
+
+	if (head) {
+		ptr = (void *) head - (unsigned long) func;
+	} else {
+		might_sleep();
+		ptr = (void *) func;
+	}
+
+	if (debug_rcu_head_queue(ptr)) {
+		/* Probable double free, just leak. */
+		WARN_ONCE(1, "%s(): Double-freed call. rcu_head %p\n",
+			  __func__, head);
+		return;
+	}
+
+	local_irq_save(flags);
+	success = kvfree_call_rcu_add_ptr_to_bulk(ptr);
+	if (static_branch_likely(&rcu_init_done)) {
+		if (success && !monitor_todo) {
+			monitor_todo = true;
+			schedule_delayed_work(&monitor_work,
+				KVFREE_DRAIN_JIFFIES);
+		}
+	}
+	local_irq_restore(flags);
+
+	if (!success) {
+		if (!head) {
+			synchronize_rcu();
+			kvfree(ptr);
+		} else {
+			call_rcu(head, func);
+		}
+	}
 }
 EXPORT_SYMBOL_GPL(kvfree_call_rcu);
 
@@ -188,4 +339,8 @@  void __init rcu_init(void)
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 	rcu_early_boot_tests();
 	srcu_init();
+
+	INIT_DELAYED_WORK(&monitor_work, kvfree_rcu_monitor);
+	INIT_RCU_WORK(&rcu_work, kvfree_rcu_work);
+	static_branch_enable(&rcu_init_done);
 }