diff mbox series

[1/1] rcu/kvfree: Add debug check of GP ready for ptrs in a list

Message ID 20230404141300.908202-1-urezki@gmail.com (mailing list archive)
State Accepted
Commit 900093e6ea97d9ff5be2dee062f93a72437ca3a5
Headers show
Series [1/1] rcu/kvfree: Add debug check of GP ready for ptrs in a list | expand

Commit Message

Uladzislau Rezki April 4, 2023, 2:13 p.m. UTC
Triiger a warning if a grace period is not passed yet for
objects queued on a linked list via rcu_head structures.

Once detached, take a full snapsot of GP sequnces to check
later that a grace period is passed and it is safe to free
all pointers.

Based on latest 'dev' branch.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 kernel/rcu/tree.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

Comments

Paul E. McKenney April 5, 2023, midnight UTC | #1
On Tue, Apr 04, 2023 at 04:13:00PM +0200, Uladzislau Rezki (Sony) wrote:
> Triiger a warning if a grace period is not passed yet for
> objects queued on a linked list via rcu_head structures.
> 
> Once detached, take a full snapsot of GP sequnces to check
> later that a grace period is passed and it is safe to free
> all pointers.
> 
> Based on latest 'dev' branch.
> 
> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>

Very good, thank you!

I queued the patch wordsmithed as shown below for review and further
testing.  Please check for any errors.

How should we go about testing this code?  The way that it would get
exercised in production would be during an out-of-memory event, correct?

							Thanx, Paul

------------------------------------------------------------------------

commit 900093e6ea97d9ff5be2dee062f93a72437ca3a5
Author: Uladzislau Rezki (Sony) <urezki@gmail.com>
Date:   Tue Apr 4 16:13:00 2023 +0200

    rcu/kvfree: Add debug check for GP complete for kfree_rcu_cpu list
    
    Under low-memory conditions, kvfree_rcu() will use each object's
    rcu_head structure to queue objects in a singly linked list headed by
    the kfree_rcu_cpu structure's ->head field.  This list is passed to
    call_rcu() as a unit, but there is no indication of which grace period
    this list needs to wait for.  This in turn prevents adding debug checks
    in the kfree_rcu_work() as was done for the two page-of-pointers channels
    in the kfree_rcu_cpu structure.
    
    This commit therefore adds a ->head_free_gp_snap field to the
    kfree_rcu_cpu_work structure to record this grace-period number.  It also
    adds a WARN_ON_ONCE() to kfree_rcu_monitor() that checks to make sure
    that the required grace period has in fact elapsed.
    
    Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
    Signed-off-by: Paul E. McKenney <paulmck@kernel.org>

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 4838a55da34f..35be35f8236b 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2802,6 +2802,7 @@ struct kvfree_rcu_bulk_data {
 struct kfree_rcu_cpu_work {
 	struct rcu_work rcu_work;
 	struct rcu_head *head_free;
+	struct rcu_gp_oldstate head_free_gp_snap;
 	struct list_head bulk_head_free[FREE_N_CHANNELS];
 	struct kfree_rcu_cpu *krcp;
 };
@@ -3007,6 +3008,7 @@ static void kfree_rcu_work(struct work_struct *work)
 	struct rcu_head *head;
 	struct kfree_rcu_cpu *krcp;
 	struct kfree_rcu_cpu_work *krwp;
+	struct rcu_gp_oldstate head_gp_snap;
 	int i;
 
 	krwp = container_of(to_rcu_work(work),
@@ -3021,6 +3023,7 @@ static void kfree_rcu_work(struct work_struct *work)
 	// Channel 3.
 	head = krwp->head_free;
 	krwp->head_free = NULL;
+	head_gp_snap = krwp->head_free_gp_snap;
 	raw_spin_unlock_irqrestore(&krcp->lock, flags);
 
 	// Handle the first two channels.
@@ -3037,7 +3040,8 @@ static void kfree_rcu_work(struct work_struct *work)
 	 * queued on a linked list through their rcu_head structures.
 	 * This list is named "Channel 3".
 	 */
-	kvfree_rcu_list(head);
+	if (head && !WARN_ON_ONCE(!poll_state_synchronize_rcu_full(&head_gp_snap)))
+		kvfree_rcu_list(head);
 }
 
 static bool
@@ -3169,6 +3173,7 @@ static void kfree_rcu_monitor(struct work_struct *work)
 			// objects queued on the linked list.
 			if (!krwp->head_free) {
 				krwp->head_free = krcp->head;
+				get_state_synchronize_rcu_full(&krwp->head_free_gp_snap);
 				atomic_set(&krcp->head_count, 0);
 				WRITE_ONCE(krcp->head, NULL);
 			}
Uladzislau Rezki April 5, 2023, 9:10 a.m. UTC | #2
On Tue, Apr 04, 2023 at 05:00:13PM -0700, Paul E. McKenney wrote:
> On Tue, Apr 04, 2023 at 04:13:00PM +0200, Uladzislau Rezki (Sony) wrote:
> > Triiger a warning if a grace period is not passed yet for
> > objects queued on a linked list via rcu_head structures.
> > 
> > Once detached, take a full snapsot of GP sequnces to check
> > later that a grace period is passed and it is safe to free
> > all pointers.
> > 
> > Based on latest 'dev' branch.
> > 
> > Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> 
> Very good, thank you!
> 
> I queued the patch wordsmithed as shown below for review and further
> testing.  Please check for any errors.
> 
> How should we go about testing this code?  The way that it would get
> exercised in production would be during an out-of-memory event, correct?
> 
Either by flooding kfree_rcu() calls by many threads or simulate a
low memory condition. I applied i high pressure using 1 000 000 frees
by 64 kthreads on 64 CPUs system.

IMHO, we have two GP checks, one for bulk and another one for list
and it looks like enough at least from my point of view.

--
Uladzislau Rezki
diff mbox series

Patch

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 4838a55da34f..35be35f8236b 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2802,6 +2802,7 @@  struct kvfree_rcu_bulk_data {
 struct kfree_rcu_cpu_work {
 	struct rcu_work rcu_work;
 	struct rcu_head *head_free;
+	struct rcu_gp_oldstate head_free_gp_snap;
 	struct list_head bulk_head_free[FREE_N_CHANNELS];
 	struct kfree_rcu_cpu *krcp;
 };
@@ -3007,6 +3008,7 @@  static void kfree_rcu_work(struct work_struct *work)
 	struct rcu_head *head;
 	struct kfree_rcu_cpu *krcp;
 	struct kfree_rcu_cpu_work *krwp;
+	struct rcu_gp_oldstate head_gp_snap;
 	int i;
 
 	krwp = container_of(to_rcu_work(work),
@@ -3021,6 +3023,7 @@  static void kfree_rcu_work(struct work_struct *work)
 	// Channel 3.
 	head = krwp->head_free;
 	krwp->head_free = NULL;
+	head_gp_snap = krwp->head_free_gp_snap;
 	raw_spin_unlock_irqrestore(&krcp->lock, flags);
 
 	// Handle the first two channels.
@@ -3037,7 +3040,8 @@  static void kfree_rcu_work(struct work_struct *work)
 	 * queued on a linked list through their rcu_head structures.
 	 * This list is named "Channel 3".
 	 */
-	kvfree_rcu_list(head);
+	if (head && !WARN_ON_ONCE(!poll_state_synchronize_rcu_full(&head_gp_snap)))
+		kvfree_rcu_list(head);
 }
 
 static bool
@@ -3169,6 +3173,7 @@  static void kfree_rcu_monitor(struct work_struct *work)
 			// objects queued on the linked list.
 			if (!krwp->head_free) {
 				krwp->head_free = krcp->head;
+				get_state_synchronize_rcu_full(&krwp->head_free_gp_snap);
 				atomic_set(&krcp->head_count, 0);
 				WRITE_ONCE(krcp->head, NULL);
 			}