diff mbox series

srcu: Remove memory barrier "E" as it does not do anything

Message ID 20221222002011.1858494-1-joel@joelfernandes.org (mailing list archive)
State Superseded
Headers show
Series srcu: Remove memory barrier "E" as it does not do anything | expand

Commit Message

Joel Fernandes Dec. 22, 2022, 12:20 a.m. UTC
During a flip, we have a full memory barrier before srcu_idx is incremented.

The idea is we intend to order the first phase scan's read of lock
counters with the flipping of the index.

However, such ordering is already enforced because of the
control-dependency between the 2 scans. We would be flipping the index
only if lock and unlock counts matched.

But such match will not happen if there was a pending reader before the flip
in the first place (observation courtesy Mathieu Desnoyers).

The litmus test below shows this:
(test courtesy Frederic Weisbecker, Changes for ctrldep by Boqun/me):

C srcu

{}

// updater
P0(int *IDX, int *LOCK0, int *UNLOCK0, int *LOCK1, int *UNLOCK1)
{
        int lock1;
        int unlock1;
        int lock0;
        int unlock0;

        // SCAN1
        unlock1 = READ_ONCE(*UNLOCK1);
        smp_mb(); // A
        lock1 = READ_ONCE(*LOCK1);

        // FLIP
        if (lock1 == unlock1) {         // Control dep
                smp_mb(); // E
                WRITE_ONCE(*IDX, 1);
                smp_mb(); // D

                // SCAN2
                unlock0 = READ_ONCE(*UNLOCK0);
                smp_mb(); // A
                lock0 = READ_ONCE(*LOCK0);
        }
}

// reader
P1(int *IDX, int *LOCK0, int *UNLOCK0, int *LOCK1, int *UNLOCK1)
{
        int tmp;
        int idx1;
        int idx2;

        // 1st reader
        idx1 = READ_ONCE(*IDX);
        if (idx1 == 0) {
                tmp = READ_ONCE(*LOCK0);
                WRITE_ONCE(*LOCK0, tmp + 1);
                smp_mb(); /* B and C */
                tmp = READ_ONCE(*UNLOCK0);
                WRITE_ONCE(*UNLOCK0, tmp + 1);
        } else {
                tmp = READ_ONCE(*LOCK1);
                WRITE_ONCE(*LOCK1, tmp + 1);
                smp_mb(); /* B and C */
                tmp = READ_ONCE(*UNLOCK1);
                WRITE_ONCE(*UNLOCK1, tmp + 1);
        }

        // second reader
        idx2 = READ_ONCE(*IDX);
        if (idx2 == 0) {
                tmp = READ_ONCE(*LOCK0);
                WRITE_ONCE(*LOCK0, tmp + 1);
                smp_mb(); /* B and C */
                tmp = READ_ONCE(*UNLOCK0);
                WRITE_ONCE(*UNLOCK0, tmp + 1);
        } else {
                tmp = READ_ONCE(*LOCK1);
                WRITE_ONCE(*LOCK1, tmp + 1);
                smp_mb(); /* B and C */
                tmp = READ_ONCE(*UNLOCK1);
                WRITE_ONCE(*UNLOCK1, tmp + 1);
        }
}

The following bad condition will not occur even if memory barrier E
is dropped:

(* bad condition: SCAN1 saw lock count changes though 1st reader saw flip *)
 exists (0:lock1=1 /\ 1:idx1=1 /\ 1:idx2=1)

Co-developed-by: Frederic Weisbecker <frederic@kernel.org>
Co-developed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Co-developed-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 kernel/rcu/srcutree.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

Comments

Frederic Weisbecker Dec. 22, 2022, 12:03 p.m. UTC | #1
On Thu, Dec 22, 2022 at 12:20:11AM +0000, Joel Fernandes (Google) wrote:
> During a flip, we have a full memory barrier before srcu_idx is incremented.
> 
> The idea is we intend to order the first phase scan's read of lock
> counters with the flipping of the index.
> 
> However, such ordering is already enforced because of the
> control-dependency between the 2 scans. We would be flipping the index
> only if lock and unlock counts matched.
> 
> But such match will not happen if there was a pending reader before the flip
> in the first place (observation courtesy Mathieu Desnoyers).
> 
> The litmus test below shows this:
> (test courtesy Frederic Weisbecker, Changes for ctrldep by Boqun/me):
> 
> C srcu
> 
> {}
> 
> // updater
> P0(int *IDX, int *LOCK0, int *UNLOCK0, int *LOCK1, int *UNLOCK1)
> {
>         int lock1;
>         int unlock1;
>         int lock0;
>         int unlock0;
> 
>         // SCAN1
>         unlock1 = READ_ONCE(*UNLOCK1);
>         smp_mb(); // A
>         lock1 = READ_ONCE(*LOCK1);
> 
>         // FLIP
>         if (lock1 == unlock1) {         // Control dep
>                 smp_mb(); // E
>                 WRITE_ONCE(*IDX, 1);
>                 smp_mb(); // D
> 
>                 // SCAN2
>                 unlock0 = READ_ONCE(*UNLOCK0);
>                 smp_mb(); // A
>                 lock0 = READ_ONCE(*LOCK0);
>         }
> }
> 
> // reader
> P1(int *IDX, int *LOCK0, int *UNLOCK0, int *LOCK1, int *UNLOCK1)
> {
>         int tmp;
>         int idx1;
>         int idx2;
> 
>         // 1st reader
>         idx1 = READ_ONCE(*IDX);
>         if (idx1 == 0) {
>                 tmp = READ_ONCE(*LOCK0);
>                 WRITE_ONCE(*LOCK0, tmp + 1);
>                 smp_mb(); /* B and C */
>                 tmp = READ_ONCE(*UNLOCK0);
>                 WRITE_ONCE(*UNLOCK0, tmp + 1);
>         } else {
>                 tmp = READ_ONCE(*LOCK1);
>                 WRITE_ONCE(*LOCK1, tmp + 1);
>                 smp_mb(); /* B and C */
>                 tmp = READ_ONCE(*UNLOCK1);
>                 WRITE_ONCE(*UNLOCK1, tmp + 1);
>         }
> 
>         // second reader
>         idx2 = READ_ONCE(*IDX);
>         if (idx2 == 0) {
>                 tmp = READ_ONCE(*LOCK0);
>                 WRITE_ONCE(*LOCK0, tmp + 1);
>                 smp_mb(); /* B and C */
>                 tmp = READ_ONCE(*UNLOCK0);
>                 WRITE_ONCE(*UNLOCK0, tmp + 1);
>         } else {
>                 tmp = READ_ONCE(*LOCK1);
>                 WRITE_ONCE(*LOCK1, tmp + 1);
>                 smp_mb(); /* B and C */
>                 tmp = READ_ONCE(*UNLOCK1);
>                 WRITE_ONCE(*UNLOCK1, tmp + 1);
>         }
> }
> 
> The following bad condition will not occur even if memory barrier E
> is dropped:
> 
> (* bad condition: SCAN1 saw lock count changes though 1st reader saw flip *)
>  exists (0:lock1=1 /\ 1:idx1=1 /\ 1:idx2=1)

Good catch!

The litmus test can be shorten with removing the second reader and limit the
exists clause to 0:lock1=1 :

---
C srcu-E

{}

// updater
P0(int *IDX, int *LOCK0, int *UNLOCK0, int *LOCK1, int *UNLOCK1)
{
	int lock1;
	int unlock1;
	int lock0;
	int unlock0;

	// SCAN1
	unlock1 = READ_ONCE(*UNLOCK1);
	smp_mb(); // A
	lock1 = READ_ONCE(*LOCK1);

	// FLIP
	if (lock1 == unlock1) {         // Control dep
		WRITE_ONCE(*IDX, 1);
		smp_mb(); // D

		// SCAN2
		unlock0 = READ_ONCE(*UNLOCK0);
		smp_mb(); // A
		lock0 = READ_ONCE(*LOCK0);
	}
}

// reader
P1(int *IDX, int *LOCK0, int *UNLOCK0, int *LOCK1, int *UNLOCK1)
{
	int tmp;
	int idx1;
	int idx2;

	// 1st reader
	idx1 = READ_ONCE(*IDX);
	if (idx1 == 0) {
		tmp = READ_ONCE(*LOCK0);
		WRITE_ONCE(*LOCK0, tmp + 1);
		smp_mb(); /* B and C */
		tmp = READ_ONCE(*UNLOCK0);
		WRITE_ONCE(*UNLOCK0, tmp + 1);
	} else {
		tmp = READ_ONCE(*LOCK1);
		WRITE_ONCE(*LOCK1, tmp + 1);
		smp_mb(); /* B and C */
		tmp = READ_ONCE(*UNLOCK1);
		WRITE_ONCE(*UNLOCK1, tmp + 1);
	}
}
exists (0:lock1=1) (* only happens if the control dep is removed *)
--

> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index 1c304fec89c0..d1368d64fdba 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -983,15 +983,9 @@ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
>  static void srcu_flip(struct srcu_struct *ssp)
>  {
>  	/*
> -	 * Ensure that if this updater saw a given reader's increment
> -	 * from __srcu_read_lock(), that reader was using an old value
> -	 * of ->srcu_idx.  Also ensure that if a given reader sees the
> -	 * new value of ->srcu_idx, this updater's earlier scans cannot
> -	 * have seen that reader's increments (which is OK, because this
> -	 * grace period need not wait on that reader).
> +	 * Control dependency locks==unlocks ensures that if a reader sees the
> +	 * flip, previous scan would only see before-flip reader lock counts.

I would personally prefer that we keep the detailed comment explaining the
ordering requirements here, just mentioning it's performed via control
dependency now. This way we don't need to debate that again in 10 years.

Thanks.


>  	 */
> -	smp_mb(); /* E */  /* Pairs with B and C. */
> -
>  	WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);
>  
>  	/*
> -- 
> 2.39.0.314.g84b9a713c41-goog
>
Joel Fernandes Dec. 22, 2022, 1:03 p.m. UTC | #2
> On Dec 22, 2022, at 7:03 AM, Frederic Weisbecker <frederic@kernel.org> wrote:
> 
> On Thu, Dec 22, 2022 at 12:20:11AM +0000, Joel Fernandes (Google) wrote:
>> During a flip, we have a full memory barrier before srcu_idx is incremented.
>> 
>> The idea is we intend to order the first phase scan's read of lock
>> counters with the flipping of the index.
>> 
>> However, such ordering is already enforced because of the
>> control-dependency between the 2 scans. We would be flipping the index
>> only if lock and unlock counts matched.
>> 
>> But such match will not happen if there was a pending reader before the flip
>> in the first place (observation courtesy Mathieu Desnoyers).
>> 
>> The litmus test below shows this:
>> (test courtesy Frederic Weisbecker, Changes for ctrldep by Boqun/me):
>> 
>> C srcu
>> 
>> {}
>> 
>> // updater
>> P0(int *IDX, int *LOCK0, int *UNLOCK0, int *LOCK1, int *UNLOCK1)
>> {
>>        int lock1;
>>        int unlock1;
>>        int lock0;
>>        int unlock0;
>> 
>>        // SCAN1
>>        unlock1 = READ_ONCE(*UNLOCK1);
>>        smp_mb(); // A
>>        lock1 = READ_ONCE(*LOCK1);
>> 
>>        // FLIP
>>        if (lock1 == unlock1) {         // Control dep
>>                smp_mb(); // E
>>                WRITE_ONCE(*IDX, 1);
>>                smp_mb(); // D
>> 
>>                // SCAN2
>>                unlock0 = READ_ONCE(*UNLOCK0);
>>                smp_mb(); // A
>>                lock0 = READ_ONCE(*LOCK0);
>>        }
>> }
>> 
>> // reader
>> P1(int *IDX, int *LOCK0, int *UNLOCK0, int *LOCK1, int *UNLOCK1)
>> {
>>        int tmp;
>>        int idx1;
>>        int idx2;
>> 
>>        // 1st reader
>>        idx1 = READ_ONCE(*IDX);
>>        if (idx1 == 0) {
>>                tmp = READ_ONCE(*LOCK0);
>>                WRITE_ONCE(*LOCK0, tmp + 1);
>>                smp_mb(); /* B and C */
>>                tmp = READ_ONCE(*UNLOCK0);
>>                WRITE_ONCE(*UNLOCK0, tmp + 1);
>>        } else {
>>                tmp = READ_ONCE(*LOCK1);
>>                WRITE_ONCE(*LOCK1, tmp + 1);
>>                smp_mb(); /* B and C */
>>                tmp = READ_ONCE(*UNLOCK1);
>>                WRITE_ONCE(*UNLOCK1, tmp + 1);
>>        }
>> 
>>        // second reader
>>        idx2 = READ_ONCE(*IDX);
>>        if (idx2 == 0) {
>>                tmp = READ_ONCE(*LOCK0);
>>                WRITE_ONCE(*LOCK0, tmp + 1);
>>                smp_mb(); /* B and C */
>>                tmp = READ_ONCE(*UNLOCK0);
>>                WRITE_ONCE(*UNLOCK0, tmp + 1);
>>        } else {
>>                tmp = READ_ONCE(*LOCK1);
>>                WRITE_ONCE(*LOCK1, tmp + 1);
>>                smp_mb(); /* B and C */
>>                tmp = READ_ONCE(*UNLOCK1);
>>                WRITE_ONCE(*UNLOCK1, tmp + 1);
>>        }
>> }
>> 
>> The following bad condition will not occur even if memory barrier E
>> is dropped:
>> 
>> (* bad condition: SCAN1 saw lock count changes though 1st reader saw flip *)
>> exists (0:lock1=1 /\ 1:idx1=1 /\ 1:idx2=1)
> 
> Good catch!
> 
> The litmus test can be shorten with removing the second reader and limit the
> exists clause to 0:lock1=1 :

Yes. Made the change and that works too.

> 
> ---
> C srcu-E
> 
> {}
> 
> // updater
> P0(int *IDX, int *LOCK0, int *UNLOCK0, int *LOCK1, int *UNLOCK1)
> {
>    int lock1;
>    int unlock1;
>    int lock0;
>    int unlock0;
> 
>    // SCAN1
>    unlock1 = READ_ONCE(*UNLOCK1);
>    smp_mb(); // A
>    lock1 = READ_ONCE(*LOCK1);
> 
>    // FLIP
>    if (lock1 == unlock1) {         // Control dep
>        WRITE_ONCE(*IDX, 1);
>        smp_mb(); // D
> 
>        // SCAN2
>        unlock0 = READ_ONCE(*UNLOCK0);
>        smp_mb(); // A
>        lock0 = READ_ONCE(*LOCK0);
>    }
> }
> 
> // reader
> P1(int *IDX, int *LOCK0, int *UNLOCK0, int *LOCK1, int *UNLOCK1)
> {
>    int tmp;
>    int idx1;
>    int idx2;
> 
>    // 1st reader
>    idx1 = READ_ONCE(*IDX);
>    if (idx1 == 0) {
>        tmp = READ_ONCE(*LOCK0);
>        WRITE_ONCE(*LOCK0, tmp + 1);
>        smp_mb(); /* B and C */
>        tmp = READ_ONCE(*UNLOCK0);
>        WRITE_ONCE(*UNLOCK0, tmp + 1);
>    } else {
>        tmp = READ_ONCE(*LOCK1);
>        WRITE_ONCE(*LOCK1, tmp + 1);
>        smp_mb(); /* B and C */
>        tmp = READ_ONCE(*UNLOCK1);
>        WRITE_ONCE(*UNLOCK1, tmp + 1);
>    }
> }
> exists (0:lock1=1) (* only happens if the control dep is removed *)
> --
> 
>> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
>> index 1c304fec89c0..d1368d64fdba 100644
>> --- a/kernel/rcu/srcutree.c
>> +++ b/kernel/rcu/srcutree.c
>> @@ -983,15 +983,9 @@ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
>> static void srcu_flip(struct srcu_struct *ssp)
>> {
>>    /*
>> -     * Ensure that if this updater saw a given reader's increment
>> -     * from __srcu_read_lock(), that reader was using an old value
>> -     * of ->srcu_idx.  Also ensure that if a given reader sees the
>> -     * new value of ->srcu_idx, this updater's earlier scans cannot
>> -     * have seen that reader's increments (which is OK, because this
>> -     * grace period need not wait on that reader).
>> +     * Control dependency locks==unlocks ensures that if a reader sees the
>> +     * flip, previous scan would only see before-flip reader lock counts.
> 
> I would personally prefer that we keep the detailed comment explaining the
> ordering requirements here, just mentioning it's performed via control
> dependency now. This way we don't need to debate that again in 10 years.

Cool, will update the patch.

Thanks.



> 
> 
>>     */
>> -    smp_mb(); /* E */  /* Pairs with B and C. */
>> -
>>    WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);
>> 
>>    /*
>> -- 
>> 2.39.0.314.g84b9a713c41-goog
>>
diff mbox series

Patch

diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 1c304fec89c0..d1368d64fdba 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -983,15 +983,9 @@  static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
 static void srcu_flip(struct srcu_struct *ssp)
 {
 	/*
-	 * Ensure that if this updater saw a given reader's increment
-	 * from __srcu_read_lock(), that reader was using an old value
-	 * of ->srcu_idx.  Also ensure that if a given reader sees the
-	 * new value of ->srcu_idx, this updater's earlier scans cannot
-	 * have seen that reader's increments (which is OK, because this
-	 * grace period need not wait on that reader).
+	 * Control dependency locks==unlocks ensures that if a reader sees the
+	 * flip, previous scan would only see before-flip reader lock counts.
 	 */
-	smp_mb(); /* E */  /* Pairs with B and C. */
-
 	WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);
 
 	/*