Message ID | 20241009180719.778285-6-paulmck@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Commit | e23ffe1f13434f2422afcb8d527c763769d49a11 |
Headers | show |
Series | SRCU-lite changes for v6.13 | expand |
> > /* > - * Returns approximate total of the readers' ->srcu_lock_count[] values > - * for the rank of per-CPU counters specified by idx. > + * Computes approximate total of the readers' ->srcu_lock_count[] values > + * for the rank of per-CPU counters specified by idx, and returns true if > + * the caller did the proper barrier (gp), and if the count of the locks > + * matches that of the unlocks passed in. > */ > -static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) > +static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks) > { > int cpu; > + unsigned long mask = 0; > unsigned long sum = 0; > > for_each_possible_cpu(cpu) { > struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); > > sum += atomic_long_read(&sdp->srcu_lock_count[idx]); > + if (IS_ENABLED(CONFIG_PROVE_RCU)) > + mask = mask | READ_ONCE(sdp->srcu_reader_flavor); > } > - return sum; > + WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)), > + "Mixed reader flavors for srcu_struct at %ps.\n", ssp); I am trying to understand the (unlikely) case where synchronize_srcu() is done before any srcu reader lock/unlock lite call is done. Can new SRCU readers fail to observe the updates? > + if (mask & SRCU_READ_FLAVOR_LITE && !gp) > + return false; So, srcu_readers_active_idx_check() can potentially return false for very long time, until the CPU executing srcu_readers_active_idx_check() does at least one read lock/unlock lite call? > + return sum == unlocks; > } > > /* > @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) > */ > static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > { > + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE); sda->srcu_reader_flavor is only set when CONFIG_PROVE_RCU is enabled. But we need the reader flavor information for srcu lite variant to work. So, lite variant does not work when CONFIG_PROVE_RCU is disabled. Am I missing something obvious here? - Neeraj > unsigned long unlocks; > > unlocks = srcu_readers_unlock_idx(ssp, idx); > @@ -482,13 +492,16 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > * unlock is counted. Needs to be a smp_mb() as the read side may > * contain a read from a variable that is written to before the > * synchronize_srcu() in the write side. In this case smp_mb()s > - * A and B act like the store buffering pattern. > + * A and B (or X and Y) act like the store buffering pattern. > * > - * This smp_mb() also pairs with smp_mb() C to prevent accesses > - * after the synchronize_srcu() from being executed before the > - * grace period ends. > + * This smp_mb() also pairs with smp_mb() C (or, in the case of X, > + * Z) to prevent accesses after the synchronize_srcu() from being > + * executed before the grace period ends. > */ > - smp_mb(); /* A */ > + if (!did_gp) > + smp_mb(); /* A */ > + else > + synchronize_rcu(); /* X */ > > /* > * If the locks are the same as the unlocks, then there must have > @@ -546,7 +559,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > * which are unlikely to be configured with an address space fully > * populated with memory, at least not anytime soon. > */ > - return srcu_readers_lock_idx(ssp, idx) == unlocks; > + return srcu_readers_lock_idx(ssp, idx, did_gp, unlocks); > } >
On Mon, Nov 11, 2024 at 06:24:58PM +0530, Neeraj Upadhyay wrote: > > > > > /* > > - * Returns approximate total of the readers' ->srcu_lock_count[] values > > - * for the rank of per-CPU counters specified by idx. > > + * Computes approximate total of the readers' ->srcu_lock_count[] values > > + * for the rank of per-CPU counters specified by idx, and returns true if > > + * the caller did the proper barrier (gp), and if the count of the locks > > + * matches that of the unlocks passed in. > > */ > > -static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) > > +static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks) > > { > > int cpu; > > + unsigned long mask = 0; > > unsigned long sum = 0; > > > > for_each_possible_cpu(cpu) { > > struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); > > > > sum += atomic_long_read(&sdp->srcu_lock_count[idx]); > > + if (IS_ENABLED(CONFIG_PROVE_RCU)) > > + mask = mask | READ_ONCE(sdp->srcu_reader_flavor); > > } > > - return sum; > > + WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)), > > + "Mixed reader flavors for srcu_struct at %ps.\n", ssp); > > I am trying to understand the (unlikely) case where synchronize_srcu() is done before any > srcu reader lock/unlock lite call is done. Can new SRCU readers fail to observe the > updates? If a SRCU reader fail to observe the index flip, then isn't it the case that the synchronize_rcu() invoked from srcu_readers_active_idx_check() must wait on it? > > + if (mask & SRCU_READ_FLAVOR_LITE && !gp) > > + return false; > > So, srcu_readers_active_idx_check() can potentially return false for very long > time, until the CPU executing srcu_readers_active_idx_check() does > at least one read lock/unlock lite call? That is correct. The theory is that until after an srcu_read_lock_lite() has executed, there is no need to wait on it. Does the practice match the theory in this case, or is there some sequence of events that I missed? > > + return sum == unlocks; > > } > > > > /* > > @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) > > */ > > static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > > { > > + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE); > > sda->srcu_reader_flavor is only set when CONFIG_PROVE_RCU is enabled. But we > need the reader flavor information for srcu lite variant to work. So, lite > variant does not work when CONFIG_PROVE_RCU is disabled. Am I missing something > obvious here? At first glance, it appears that I am the one who missed something obvious. Including in testing, which failed to uncover this issue. Thank you for the careful reviews! Thanx, Paul > - Neeraj > > > unsigned long unlocks; > > > > unlocks = srcu_readers_unlock_idx(ssp, idx); > > @@ -482,13 +492,16 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > > * unlock is counted. Needs to be a smp_mb() as the read side may > > * contain a read from a variable that is written to before the > > * synchronize_srcu() in the write side. In this case smp_mb()s > > - * A and B act like the store buffering pattern. > > + * A and B (or X and Y) act like the store buffering pattern. > > * > > - * This smp_mb() also pairs with smp_mb() C to prevent accesses > > - * after the synchronize_srcu() from being executed before the > > - * grace period ends. > > + * This smp_mb() also pairs with smp_mb() C (or, in the case of X, > > + * Z) to prevent accesses after the synchronize_srcu() from being > > + * executed before the grace period ends. > > */ > > - smp_mb(); /* A */ > > + if (!did_gp) > > + smp_mb(); /* A */ > > + else > > + synchronize_rcu(); /* X */ > > > > /* > > * If the locks are the same as the unlocks, then there must have > > @@ -546,7 +559,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > > * which are unlikely to be configured with an address space fully > > * populated with memory, at least not anytime soon. > > */ > > - return srcu_readers_lock_idx(ssp, idx) == unlocks; > > + return srcu_readers_lock_idx(ssp, idx, did_gp, unlocks); > > } > > >
On 11/11/2024 8:56 PM, Paul E. McKenney wrote: > On Mon, Nov 11, 2024 at 06:24:58PM +0530, Neeraj Upadhyay wrote: >> >>> >>> /* >>> - * Returns approximate total of the readers' ->srcu_lock_count[] values >>> - * for the rank of per-CPU counters specified by idx. >>> + * Computes approximate total of the readers' ->srcu_lock_count[] values >>> + * for the rank of per-CPU counters specified by idx, and returns true if >>> + * the caller did the proper barrier (gp), and if the count of the locks >>> + * matches that of the unlocks passed in. >>> */ >>> -static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) >>> +static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks) >>> { >>> int cpu; >>> + unsigned long mask = 0; >>> unsigned long sum = 0; >>> >>> for_each_possible_cpu(cpu) { >>> struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); >>> >>> sum += atomic_long_read(&sdp->srcu_lock_count[idx]); >>> + if (IS_ENABLED(CONFIG_PROVE_RCU)) >>> + mask = mask | READ_ONCE(sdp->srcu_reader_flavor); >>> } >>> - return sum; >>> + WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)), >>> + "Mixed reader flavors for srcu_struct at %ps.\n", ssp); >> >> I am trying to understand the (unlikely) case where synchronize_srcu() is done before any >> srcu reader lock/unlock lite call is done. Can new SRCU readers fail to observe the >> updates? > > If a SRCU reader fail to observe the index flip, then isn't it the case > that the synchronize_rcu() invoked from srcu_readers_active_idx_check() > must wait on it? > Below is the sequence of operations I was thinking of, where at step 4 CPU2 reads old pointer ptr = old CPU1 CPU2 1. Update ptr = new 2. synchronize_srcu() <Does not use synchronize_rcu() as SRCU_READ_FLAVOR_LITE is not set for any sdp as srcu_read_lock_lite() hasn't been called by any CPU> 3. srcu_read_lock_lite() <No smp_mb() ordering> 4. Can read ptr == old ? >>> + if (mask & SRCU_READ_FLAVOR_LITE && !gp) >>> + return false; >> >> So, srcu_readers_active_idx_check() can potentially return false for very long >> time, until the CPU executing srcu_readers_active_idx_check() does >> at least one read lock/unlock lite call? > > That is correct. The theory is that until after an srcu_read_lock_lite() > has executed, there is no need to wait on it. Does the practice match the > theory in this case, or is there some sequence of events that I missed? > Below sequence CPU1 CPU2 1. srcu_read_lock_lite() 2. srcu_read_unlock_lite() 3. synchronize_srcu() 3.1 srcu_readers_lock_idx() is called with gp = false as srcu_read_lock_lite() was never called on this CPU for this srcu_struct. So ssp->sda->srcu_reader_flavor is not set for CPU1's sda. 3.2 Inside srcu_readers_lock_idx() "mask" contains SRCU_READ_FLAVOR_LITE as CPU2's sdp->srcu_reader_flavor has it. 3.3 CPU1 keeps returning false from below check until CPU1 does at least one srcu_read_lock_lite() call or the thread migrates. if (mask & SRCU_READ_FLAVOR_LITE && !gp) return false; >>> + return sum == unlocks; >>> } >>> >>> /* >>> @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) >>> */ >>> static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) >>> { >>> + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE); >> >> sda->srcu_reader_flavor is only set when CONFIG_PROVE_RCU is enabled. But we >> need the reader flavor information for srcu lite variant to work. So, lite >> variant does not work when CONFIG_PROVE_RCU is disabled. Am I missing something >> obvious here? > > At first glance, it appears that I am the one who missed something obvious. > Including in testing, which failed to uncover this issue. > > Thank you for the careful reviews! > Sure thing, no problem! - Neeraj > Thanx, Paul > >> - Neeraj >> >>> unsigned long unlocks; >>> >>> unlocks = srcu_readers_unlock_idx(ssp, idx); >>> @@ -482,13 +492,16 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) >>> * unlock is counted. Needs to be a smp_mb() as the read side may >>> * contain a read from a variable that is written to before the >>> * synchronize_srcu() in the write side. In this case smp_mb()s >>> - * A and B act like the store buffering pattern. >>> + * A and B (or X and Y) act like the store buffering pattern. >>> * >>> - * This smp_mb() also pairs with smp_mb() C to prevent accesses >>> - * after the synchronize_srcu() from being executed before the >>> - * grace period ends. >>> + * This smp_mb() also pairs with smp_mb() C (or, in the case of X, >>> + * Z) to prevent accesses after the synchronize_srcu() from being >>> + * executed before the grace period ends. >>> */ >>> - smp_mb(); /* A */ >>> + if (!did_gp) >>> + smp_mb(); /* A */ >>> + else >>> + synchronize_rcu(); /* X */ >>> >>> /* >>> * If the locks are the same as the unlocks, then there must have >>> @@ -546,7 +559,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) >>> * which are unlikely to be configured with an address space fully >>> * populated with memory, at least not anytime soon. >>> */ >>> - return srcu_readers_lock_idx(ssp, idx) == unlocks; >>> + return srcu_readers_lock_idx(ssp, idx, did_gp, unlocks); >>> } >>> >>
On Mon, Nov 11, 2024 at 10:21:58PM +0530, Neeraj Upadhyay wrote: > On 11/11/2024 8:56 PM, Paul E. McKenney wrote: > > On Mon, Nov 11, 2024 at 06:24:58PM +0530, Neeraj Upadhyay wrote: > >>> /* > >>> - * Returns approximate total of the readers' ->srcu_lock_count[] values > >>> - * for the rank of per-CPU counters specified by idx. > >>> + * Computes approximate total of the readers' ->srcu_lock_count[] values > >>> + * for the rank of per-CPU counters specified by idx, and returns true if > >>> + * the caller did the proper barrier (gp), and if the count of the locks > >>> + * matches that of the unlocks passed in. > >>> */ > >>> -static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) > >>> +static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks) > >>> { > >>> int cpu; > >>> + unsigned long mask = 0; > >>> unsigned long sum = 0; > >>> > >>> for_each_possible_cpu(cpu) { > >>> struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); > >>> > >>> sum += atomic_long_read(&sdp->srcu_lock_count[idx]); > >>> + if (IS_ENABLED(CONFIG_PROVE_RCU)) > >>> + mask = mask | READ_ONCE(sdp->srcu_reader_flavor); > >>> } > >>> - return sum; > >>> + WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)), > >>> + "Mixed reader flavors for srcu_struct at %ps.\n", ssp); > >> > >> I am trying to understand the (unlikely) case where synchronize_srcu() is done before any > >> srcu reader lock/unlock lite call is done. Can new SRCU readers fail to observe the > >> updates? > > > > If a SRCU reader fail to observe the index flip, then isn't it the case > > that the synchronize_rcu() invoked from srcu_readers_active_idx_check() > > must wait on it? > > Below is the sequence of operations I was thinking of, where at step 4 CPU2 > reads old pointer > > ptr = old > > > CPU1 CPU2 > > 1. Update ptr = new > > 2. synchronize_srcu() > > <Does not use synchronize_rcu() > as SRCU_READ_FLAVOR_LITE is not > set for any sdp as srcu_read_lock_lite() > hasn't been called by any CPU> > > 3. srcu_read_lock_lite() > <No smp_mb() ordering> > > 4. Can read ptr == old ? As long as the kernel was built with CONFIG_PROVE_RCU=y and given a fix to the wrong-CPU issue you quite rightly point out below, no it cannot. The CPU's first call to srcu_read_lock_lite() will use cmpxchg() to update ->srcu_reader_flavor, which will place full ordering between that update and the later read from "ptr". So if the synchronize_srcu() is too early to see the SRCU_READ_FLAVOR_LITE bit, then the reader must see the new value of "ptr". Similarly, if the reader can see the old value of "ptr", then synchronize_srcu() must see the reader's setting of the SRCU_READ_FLAVOR_LITE bit. But both the CONFIG_PROVE_RCU=n and the wrong-CPU issue must be fixed for this to work. Please see the upcoming patches to be posted as a reply to this email. > >>> + if (mask & SRCU_READ_FLAVOR_LITE && !gp) > >>> + return false; > >> > >> So, srcu_readers_active_idx_check() can potentially return false for very long > >> time, until the CPU executing srcu_readers_active_idx_check() does > >> at least one read lock/unlock lite call? > > > > That is correct. The theory is that until after an srcu_read_lock_lite() > > has executed, there is no need to wait on it. Does the practice match the > > theory in this case, or is there some sequence of events that I missed? > > Below sequence > > CPU1 CPU2 > 1. srcu_read_lock_lite() > > > 2. srcu_read_unlock_lite() > > 3. synchronize_srcu() > > 3.1 srcu_readers_lock_idx() is > called with gp = false as > srcu_read_lock_lite() was never > called on this CPU for this > srcu_struct. So > ssp->sda->srcu_reader_flavor is not > set for CPU1's sda. Good eyes! Yes, the scan that sums the ->srcu_unlock_count[] counters must also OR together the ->srcu_reader_flavor fields. > 3.2 Inside srcu_readers_lock_idx() > "mask" contains SRCU_READ_FLAVOR_LITE > as CPU2's sdp->srcu_reader_flavor has it. > > 3.3 CPU1 keeps returning false from > below check until CPU1 does at least > one srcu_read_lock_lite() call or > the thread migrates. > > if (mask & SRCU_READ_FLAVOR_LITE && !gp) > return false; This is also fixed by the OR of the ->srcu_reader_flavor fields, correct? I guess I could claim that this bug prevents the wrong-CPU bug above from resulting in a too-short SRCU grace period, but it is of course better to just fix the bugs. ;-) > >>> + return sum == unlocks; > >>> } > >>> > >>> /* > >>> @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) > >>> */ > >>> static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > >>> { > >>> + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE); > >> > >> sda->srcu_reader_flavor is only set when CONFIG_PROVE_RCU is enabled. But we > >> need the reader flavor information for srcu lite variant to work. So, lite > >> variant does not work when CONFIG_PROVE_RCU is disabled. Am I missing something > >> obvious here? > > > > At first glance, it appears that I am the one who missed something obvious. > > Including in testing, which failed to uncover this issue. > > > > Thank you for the careful reviews! > > Sure thing, no problem! And again, thank you! Thanx, Paul > >>> unsigned long unlocks; > >>> > >>> unlocks = srcu_readers_unlock_idx(ssp, idx); > >>> @@ -482,13 +492,16 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > >>> * unlock is counted. Needs to be a smp_mb() as the read side may > >>> * contain a read from a variable that is written to before the > >>> * synchronize_srcu() in the write side. In this case smp_mb()s > >>> - * A and B act like the store buffering pattern. > >>> + * A and B (or X and Y) act like the store buffering pattern. > >>> * > >>> - * This smp_mb() also pairs with smp_mb() C to prevent accesses > >>> - * after the synchronize_srcu() from being executed before the > >>> - * grace period ends. > >>> + * This smp_mb() also pairs with smp_mb() C (or, in the case of X, > >>> + * Z) to prevent accesses after the synchronize_srcu() from being > >>> + * executed before the grace period ends. > >>> */ > >>> - smp_mb(); /* A */ > >>> + if (!did_gp) > >>> + smp_mb(); /* A */ > >>> + else > >>> + synchronize_rcu(); /* X */ > >>> > >>> /* > >>> * If the locks are the same as the unlocks, then there must have > >>> @@ -546,7 +559,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > >>> * which are unlikely to be configured with an address space fully > >>> * populated with memory, at least not anytime soon. > >>> */ > >>> - return srcu_readers_lock_idx(ssp, idx) == unlocks; > >>> + return srcu_readers_lock_idx(ssp, idx, did_gp, unlocks); > >>> } > >>> > >>
>>>> I am trying to understand the (unlikely) case where synchronize_srcu() is done before any >>>> srcu reader lock/unlock lite call is done. Can new SRCU readers fail to observe the >>>> updates? >>> >>> If a SRCU reader fail to observe the index flip, then isn't it the case >>> that the synchronize_rcu() invoked from srcu_readers_active_idx_check() >>> must wait on it? >> >> Below is the sequence of operations I was thinking of, where at step 4 CPU2 >> reads old pointer >> >> ptr = old >> >> >> CPU1 CPU2 >> >> 1. Update ptr = new >> >> 2. synchronize_srcu() >> >> <Does not use synchronize_rcu() >> as SRCU_READ_FLAVOR_LITE is not >> set for any sdp as srcu_read_lock_lite() >> hasn't been called by any CPU> >> >> 3. srcu_read_lock_lite() >> <No smp_mb() ordering> >> >> 4. Can read ptr == old ? > > As long as the kernel was built with CONFIG_PROVE_RCU=y and given a fix > to the wrong-CPU issue you quite rightly point out below, no it cannot. > The CPU's first call to srcu_read_lock_lite() will use cmpxchg() to update > ->srcu_reader_flavor, which will place full ordering between that update > and the later read from "ptr". > > So if the synchronize_srcu() is too early to see the SRCU_READ_FLAVOR_LITE > bit, then the reader must see the new value of "ptr". Similarly, > if the reader can see the old value of "ptr", then synchronize_srcu() > must see the reader's setting of the SRCU_READ_FLAVOR_LITE bit. > > But both the CONFIG_PROVE_RCU=n and the wrong-CPU issue must be fixed > for this to work. Please see the upcoming patches to be posted as a > reply to this email. > Got it. It will be good to document how full ordering provided by cmpxchg() helps here. >>>>> + if (mask & SRCU_READ_FLAVOR_LITE && !gp) >>>>> + return false; >>>> >>>> So, srcu_readers_active_idx_check() can potentially return false for very long >>>> time, until the CPU executing srcu_readers_active_idx_check() does >>>> at least one read lock/unlock lite call? >>> >>> That is correct. The theory is that until after an srcu_read_lock_lite() >>> has executed, there is no need to wait on it. Does the practice match the >>> theory in this case, or is there some sequence of events that I missed? >> >> Below sequence >> >> CPU1 CPU2 >> 1. srcu_read_lock_lite() >> >> >> 2. srcu_read_unlock_lite() >> >> 3. synchronize_srcu() >> >> 3.1 srcu_readers_lock_idx() is >> called with gp = false as >> srcu_read_lock_lite() was never >> called on this CPU for this >> srcu_struct. So >> ssp->sda->srcu_reader_flavor is not >> set for CPU1's sda. > > Good eyes! Yes, the scan that sums the ->srcu_unlock_count[] counters > must also OR together the ->srcu_reader_flavor fields. > Agree >> 3.2 Inside srcu_readers_lock_idx() >> "mask" contains SRCU_READ_FLAVOR_LITE >> as CPU2's sdp->srcu_reader_flavor has it. >> >> 3.3 CPU1 keeps returning false from >> below check until CPU1 does at least >> one srcu_read_lock_lite() call or >> the thread migrates. >> >> if (mask & SRCU_READ_FLAVOR_LITE && !gp) >> return false; > > This is also fixed by the OR of the ->srcu_reader_flavor fields, correct? > Yes, agree. > I guess I could claim that this bug prevents the wrong-CPU bug above > from resulting in a too-short SRCU grace period, but it is of course > better to just fix the bugs. ;-) > >>>>> + return sum == unlocks; >>>>> } >>>>> >>>>> /* >>>>> @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) >>>>> */ >>>>> static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) >>>>> { >>>>> + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE); >>>> >>>> sda->srcu_reader_flavor is only set when CONFIG_PROVE_RCU is enabled. But we >>>> need the reader flavor information for srcu lite variant to work. So, lite >>>> variant does not work when CONFIG_PROVE_RCU is disabled. Am I missing something >>>> obvious here? >>> >>> At first glance, it appears that I am the one who missed something obvious. >>> Including in testing, which failed to uncover this issue. >>> >>> Thank you for the careful reviews! >> >> Sure thing, no problem! > > And again, thank you! > Again, no problem! - Neeraj
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 84daaa33ea0ab..4ba96e2cfa405 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -56,6 +56,13 @@ void call_srcu(struct srcu_struct *ssp, struct rcu_head *head, void cleanup_srcu_struct(struct srcu_struct *ssp); int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); +#ifdef CONFIG_TINY_SRCU +#define __srcu_read_lock_lite __srcu_read_lock +#define __srcu_read_unlock_lite __srcu_read_unlock +#else // #ifdef CONFIG_TINY_SRCU +int __srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp); +void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) __releases(ssp); +#endif // #else // #ifdef CONFIG_TINY_SRCU void synchronize_srcu(struct srcu_struct *ssp); #define SRCU_GET_STATE_COMPLETED 0x1 @@ -179,7 +186,7 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU) void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); #else -static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor) { } +#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0) #endif @@ -249,6 +256,32 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) return retval; } +/** + * srcu_read_lock_lite - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter an SRCU read-side critical section, but for a light-weight + * smp_mb()-free reader. See srcu_read_lock() for more information. + * + * If srcu_read_lock_lite() is ever used on an srcu_struct structure, + * then none of the other flavors may be used, whether before, during, + * or after. Note that grace-period auto-expediting is disabled for _lite + * srcu_struct structures because auto-expedited grace periods invoke + * synchronize_rcu_expedited(), IPIs and all. + * + * Note that srcu_read_lock_lite() can be invoked only from those contexts + * where RCU is watching. Otherwise, lockdep will complain. + */ +static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp) +{ + int retval; + + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); + retval = __srcu_read_lock_lite(ssp); + rcu_try_lock_acquire(&ssp->dep_map); + return retval; +} + /** * srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure. * @ssp: srcu_struct in which to register the new reader. @@ -325,6 +358,22 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) __srcu_read_unlock(ssp, idx); } +/** + * srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @idx: return value from corresponding srcu_read_lock(). + * + * Exit a light-weight SRCU read-side critical section. + */ +static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) + __releases(ssp) +{ + WARN_ON_ONCE(idx & ~0x1); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); + srcu_lock_release(&ssp->dep_map); + __srcu_read_unlock(ssp, idx); +} + /** * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 79ad809c7f035..8074138cbd624 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -46,6 +46,7 @@ struct srcu_data { /* Values for ->srcu_reader_flavor. */ #define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). #define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). +#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). /* * Node in SRCU combining tree, similar in function to rcu_data. diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 4c51be484b48a..bf51758cf4a64 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -429,20 +429,29 @@ static bool srcu_gp_is_expedited(struct srcu_struct *ssp) } /* - * Returns approximate total of the readers' ->srcu_lock_count[] values - * for the rank of per-CPU counters specified by idx. + * Computes approximate total of the readers' ->srcu_lock_count[] values + * for the rank of per-CPU counters specified by idx, and returns true if + * the caller did the proper barrier (gp), and if the count of the locks + * matches that of the unlocks passed in. */ -static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) +static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks) { int cpu; + unsigned long mask = 0; unsigned long sum = 0; for_each_possible_cpu(cpu) { struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); sum += atomic_long_read(&sdp->srcu_lock_count[idx]); + if (IS_ENABLED(CONFIG_PROVE_RCU)) + mask = mask | READ_ONCE(sdp->srcu_reader_flavor); } - return sum; + WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)), + "Mixed reader flavors for srcu_struct at %ps.\n", ssp); + if (mask & SRCU_READ_FLAVOR_LITE && !gp) + return false; + return sum == unlocks; } /* @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) */ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) { + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE); unsigned long unlocks; unlocks = srcu_readers_unlock_idx(ssp, idx); @@ -482,13 +492,16 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) * unlock is counted. Needs to be a smp_mb() as the read side may * contain a read from a variable that is written to before the * synchronize_srcu() in the write side. In this case smp_mb()s - * A and B act like the store buffering pattern. + * A and B (or X and Y) act like the store buffering pattern. * - * This smp_mb() also pairs with smp_mb() C to prevent accesses - * after the synchronize_srcu() from being executed before the - * grace period ends. + * This smp_mb() also pairs with smp_mb() C (or, in the case of X, + * Z) to prevent accesses after the synchronize_srcu() from being + * executed before the grace period ends. */ - smp_mb(); /* A */ + if (!did_gp) + smp_mb(); /* A */ + else + synchronize_rcu(); /* X */ /* * If the locks are the same as the unlocks, then there must have @@ -546,7 +559,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) * which are unlikely to be configured with an address space fully * populated with memory, at least not anytime soon. */ - return srcu_readers_lock_idx(ssp, idx) == unlocks; + return srcu_readers_lock_idx(ssp, idx, did_gp, unlocks); } /** @@ -750,6 +763,47 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx) } EXPORT_SYMBOL_GPL(__srcu_read_unlock); +/* + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct. Returns an index that must be passed to the matching + * srcu_read_unlock_lite(). + * + * Note that this_cpu_inc() is an RCU read-side critical section either + * because it disables interrupts, because it is a single instruction, + * or because it is a read-modify-write atomic operation, depending on + * the whims of the architecture. + */ +int __srcu_read_lock_lite(struct srcu_struct *ssp) +{ + int idx; + + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite()."); + idx = READ_ONCE(ssp->srcu_idx) & 0x1; + this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); /* Y */ + barrier(); /* Avoid leaking the critical section. */ + return idx; +} +EXPORT_SYMBOL_GPL(__srcu_read_lock_lite); + +/* + * Removes the count for the old reader from the appropriate + * per-CPU element of the srcu_struct. Note that this may well be a + * different CPU than that which was incremented by the corresponding + * srcu_read_lock_lite(), but it must be within the same task. + * + * Note that this_cpu_inc() is an RCU read-side critical section either + * because it disables interrupts, because it is a single instruction, + * or because it is a read-modify-write atomic operation, depending on + * the whims of the architecture. + */ +void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) +{ + barrier(); /* Avoid leaking the critical section. */ + this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); /* Z */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite()."); +} +EXPORT_SYMBOL_GPL(__srcu_read_unlock_lite); + #ifdef CONFIG_NEED_SRCU_NMI_SAFE /* @@ -1134,6 +1188,8 @@ static void srcu_flip(struct srcu_struct *ssp) * it stays until either (1) Compilers learn about this sort of * control dependency or (2) Some production workload running on * a production system is unduly delayed by this slowpath smp_mb(). + * Except for _lite() readers, where it is inoperative, which + * means that it is a good thing that it is redundant. */ smp_mb(); /* E */ /* Pairs with B and C. */ @@ -1152,7 +1208,8 @@ static void srcu_flip(struct srcu_struct *ssp) /* * If SRCU is likely idle, in other words, the next SRCU grace period - * should be expedited, return true, otherwise return false. + * should be expedited, return true, otherwise return false. Except that + * in the presence of _lite() readers, always return false. * * Note that it is OK for several current from-idle requests for a new * grace period from idle to specify expediting because they will all end @@ -1181,6 +1238,9 @@ static bool srcu_should_expedite(struct srcu_struct *ssp) unsigned long tlast; check_init_srcu_struct(ssp); + /* If _lite() readers, don't do unsolicited expediting. */ + if (this_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE) + return false; /* If the local srcu_data structure has callbacks, not idle. */ sdp = raw_cpu_ptr(ssp->sda); spin_lock_irqsave_rcu_node(sdp, flags);