Message ID | 20230223063022.2592212-1-qiang1.zhang@intel.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic() | expand |
> From: Zqiang <qiang1.zhang@intel.com> > Sent: Thursday, February 23, 2023 2:30 PM > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com; > joel@joelfernandes.org > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in > call_rcu_tasks_generic() > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup > using irq_work_queue() is because if the caller of > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in > wake_up(), so the lockdep splats will happen. but now using > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up(). rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then: raw_spin_lock_irqsave() ... raw_spin_unlock_irqrestore > irq_work_queue(), invoke rcuwait_wake_up() directly in > call_rcu_tasks_generic(). > > Signed-off-by: Zqiang <qiang1.zhang@intel.com> > --- > kernel/rcu/tasks.h | 16 +--------------- > 1 file changed, 1 insertion(+), 15 deletions(-) > > diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index > baf7ec178155..757b8c6da1ad 100644 > --- a/kernel/rcu/tasks.h > +++ b/kernel/rcu/tasks.h > @@ -39,7 +39,6 @@ struct rcu_tasks_percpu { > unsigned long rtp_jiffies; > unsigned long rtp_n_lock_retries; > struct work_struct rtp_work; > - struct irq_work rtp_irq_work; > struct rcu_head barrier_q_head; > struct list_head rtp_blkd_tasks; > int cpu; > @@ -112,12 +111,9 @@ struct rcu_tasks { > char *kname; > }; > > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp); > - > #define DEFINE_RCU_TASKS(rt_name, gp, call, n) > \ > static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { > \ > .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## > __percpu.cbs_pcpu_lock), \ > - .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup), > \ > }; > \ > static struct rcu_tasks rt_name = > \ > { > \ > @@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp) > pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, > data_race(rtp->percpu_enqueue_shift), data_race(rtp- > >percpu_enqueue_lim)); > } > > -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic(). > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{ > - struct rcu_tasks *rtp; > - struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct > rcu_tasks_percpu, rtp_irq_work); > - > - rtp = rtpcp->rtpp; > - rcuwait_wake_up(&rtp->cbs_wait); > -} > - > // Enqueue a callback for the specified flavor of Tasks RCU. > static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, > struct rcu_tasks *rtp) > @@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head > *rhp, rcu_callback_t func, > rcu_read_unlock(); > /* We can't create the thread unless interrupts are enabled. */ > if (needwake && READ_ONCE(rtp->kthread_ptr)) > - irq_work_queue(&rtpcp->rtp_irq_work); > + rcuwait_wake_up(&rtp->cbs_wait); > } > > // RCU callback function for rcu_barrier_tasks_generic(). > -- > 2.25.1
> From: Zqiang <qiang1.zhang@intel.com> > Sent: Thursday, February 23, 2023 2:30 PM > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com; > joel@joelfernandes.org > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in > call_rcu_tasks_generic() > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup > using irq_work_queue() is because if the caller of > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in > wake_up(), so the lockdep splats will happen. but now using > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up(). > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then: > > raw_spin_lock_irqsave() > ... > raw_spin_unlock_irqrestore Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to sleepable lock in Preempt-RT kernel, but raw spinlock is not change). acquire raw_spinlock -> acquire spinlock will trigger lockdep warning. Thanks Zqiang > > > irq_work_queue(), invoke rcuwait_wake_up() directly in > call_rcu_tasks_generic(). > > Signed-off-by: Zqiang <qiang1.zhang@intel.com> > --- > kernel/rcu/tasks.h | 16 +--------------- > 1 file changed, 1 insertion(+), 15 deletions(-) > > diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index > baf7ec178155..757b8c6da1ad 100644 > --- a/kernel/rcu/tasks.h > +++ b/kernel/rcu/tasks.h > @@ -39,7 +39,6 @@ struct rcu_tasks_percpu { > unsigned long rtp_jiffies; > unsigned long rtp_n_lock_retries; > struct work_struct rtp_work; > - struct irq_work rtp_irq_work; > struct rcu_head barrier_q_head; > struct list_head rtp_blkd_tasks; > int cpu; > @@ -112,12 +111,9 @@ struct rcu_tasks { > char *kname; > }; > > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp); > - > #define DEFINE_RCU_TASKS(rt_name, gp, call, n) > \ > static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { > \ > .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## > __percpu.cbs_pcpu_lock), \ > - .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup), > \ > }; > \ > static struct rcu_tasks rt_name = > \ > { > \ > @@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp) > pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, > data_race(rtp->percpu_enqueue_shift), data_race(rtp- > >percpu_enqueue_lim)); > } > > -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic(). > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{ > - struct rcu_tasks *rtp; > - struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct > rcu_tasks_percpu, rtp_irq_work); > - > - rtp = rtpcp->rtpp; > - rcuwait_wake_up(&rtp->cbs_wait); > -} > - > // Enqueue a callback for the specified flavor of Tasks RCU. > static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, > struct rcu_tasks *rtp) > @@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head > *rhp, rcu_callback_t func, > rcu_read_unlock(); > /* We can't create the thread unless interrupts are enabled. */ > if (needwake && READ_ONCE(rtp->kthread_ptr)) > - irq_work_queue(&rtpcp->rtp_irq_work); > + rcuwait_wake_up(&rtp->cbs_wait); > } > > // RCU callback function for rcu_barrier_tasks_generic(). > -- > 2.25.1
> From: Zhang, Qiang1 <qiang1.zhang@intel.com> > Sent: Thursday, February 23, 2023 4:43 PM > To: Zhuo, Qiuxu <qiuxu.zhuo@intel.com>; paulmck@kernel.org; > frederic@kernel.org; quic_neeraju@quicinc.com; joel@joelfernandes.org > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org > Subject: RE: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in > call_rcu_tasks_generic() > > > From: Zqiang <qiang1.zhang@intel.com> > > Sent: Thursday, February 23, 2023 2:30 PM > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com; > > joel@joelfernandes.org > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in > > call_rcu_tasks_generic() > > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to > > wakeup using irq_work_queue() is because if the caller of > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is > > built with CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will > be > > hold in wake_up(), so the lockdep splats will happen. but now using > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), > > in > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove > > using > > > >There are still spinlock-acquisition and spinlock-release invocations within > the call path from rcuwait_wake_up(). > > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then: > > > > raw_spin_lock_irqsave() > > ... > > raw_spin_unlock_irqrestore > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will > convert to sleepable lock in Preempt-RT kernel, but raw spinlock is not > change). Hi Qiang, Yes, you're correct. Thanks for correcting me. -Qiuxu > acquire raw_spinlock -> acquire spinlock will trigger lockdep warning. > > Thanks > Zqiang
On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote: > > From: Zqiang <qiang1.zhang@intel.com> > > Sent: Thursday, February 23, 2023 2:30 PM > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com; > > joel@joelfernandes.org > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in > > call_rcu_tasks_generic() > > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup > > using irq_work_queue() is because if the caller of > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in > > wake_up(), so the lockdep splats will happen. but now using > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using > > > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up(). > > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then: > > > > raw_spin_lock_irqsave() > > ... > > raw_spin_unlock_irqrestore > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to > sleepable lock in Preempt-RT kernel, but raw spinlock is not change). > > acquire raw_spinlock -> acquire spinlock will trigger lockdep warning. Is this really safe in the long run though? I seem to remember there are weird locking dependencies if RCU is used from within the scheduler [1]. I prefer to keep it as irq_work_queue() unless you are seeing some benefit. Generally, there has to be a 'win' or other justification for adding more risk. thanks, - Joel [1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html > > irq_work_queue(), invoke rcuwait_wake_up() directly in > > call_rcu_tasks_generic(). > > > > Signed-off-by: Zqiang <qiang1.zhang@intel.com> > > --- > > kernel/rcu/tasks.h | 16 +--------------- > > 1 file changed, 1 insertion(+), 15 deletions(-) > > > > diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index > > baf7ec178155..757b8c6da1ad 100644 > > --- a/kernel/rcu/tasks.h > > +++ b/kernel/rcu/tasks.h > > @@ -39,7 +39,6 @@ struct rcu_tasks_percpu { > > unsigned long rtp_jiffies; > > unsigned long rtp_n_lock_retries; > > struct work_struct rtp_work; > > - struct irq_work rtp_irq_work; > > struct rcu_head barrier_q_head; > > struct list_head rtp_blkd_tasks; > > int cpu; > > @@ -112,12 +111,9 @@ struct rcu_tasks { > > char *kname; > > }; > > > > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp); > > - > > #define DEFINE_RCU_TASKS(rt_name, gp, call, n) > > \ > > static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { > > \ > > .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## > > __percpu.cbs_pcpu_lock), \ > > - .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup), > > \ > > }; > > \ > > static struct rcu_tasks rt_name = > > \ > > { > > \ > > @@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp) > > pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, > > data_race(rtp->percpu_enqueue_shift), data_race(rtp- > > >percpu_enqueue_lim)); > > } > > > > -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic(). > > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{ > > - struct rcu_tasks *rtp; > > - struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct > > rcu_tasks_percpu, rtp_irq_work); > > - > > - rtp = rtpcp->rtpp; > > - rcuwait_wake_up(&rtp->cbs_wait); > > -} > > - > > // Enqueue a callback for the specified flavor of Tasks RCU. > > static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, > > struct rcu_tasks *rtp) > > @@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head > > *rhp, rcu_callback_t func, > > rcu_read_unlock(); > > /* We can't create the thread unless interrupts are enabled. */ > > if (needwake && READ_ONCE(rtp->kthread_ptr)) > > - irq_work_queue(&rtpcp->rtp_irq_work); > > + rcuwait_wake_up(&rtp->cbs_wait); > > } > > > > // RCU callback function for rcu_barrier_tasks_generic(). > > -- > > 2.25.1 >
> On Feb 23, 2023, at 11:10 AM, Joel Fernandes <joel@joelfernandes.org> wrote: > > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote: >>> From: Zqiang <qiang1.zhang@intel.com> >>> Sent: Thursday, February 23, 2023 2:30 PM >>> To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com; >>> joel@joelfernandes.org >>> Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org >>> Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in >>> call_rcu_tasks_generic() >>> >>> According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from >>> call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup >>> using irq_work_queue() is because if the caller of >>> call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with >>> CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in >>> wake_up(), so the lockdep splats will happen. but now using >>> rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in >>> rcuwait_wake_up() no spinlock will be acquired, so this commit remove using >>> >>> There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up(). >>> >>> rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then: >>> >>> raw_spin_lock_irqsave() >>> ... >>> raw_spin_unlock_irqrestore >> >> Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to >> sleepable lock in Preempt-RT kernel, but raw spinlock is not change). >> >> acquire raw_spinlock -> acquire spinlock will trigger lockdep warning. > > Is this really safe in the long run though? I seem to remember there are > weird locking dependencies if RCU is used from within the scheduler [1]. > > I prefer to keep it as irq_work_queue() unless you are seeing some benefit. > Generally, there has to be a 'win' or other justification for adding more > risk. On second thought, you are deleting a decent number of lines. What do others think? I will take a closer look later, I am interested in researching the new lock dependency this adds. - Joel > > thanks, > > - Joel > [1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html > >>> irq_work_queue(), invoke rcuwait_wake_up() directly in >>> call_rcu_tasks_generic(). >>> >>> Signed-off-by: Zqiang <qiang1.zhang@intel.com> >>> --- >>> kernel/rcu/tasks.h | 16 +--------------- >>> 1 file changed, 1 insertion(+), 15 deletions(-) >>> >>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index >>> baf7ec178155..757b8c6da1ad 100644 >>> --- a/kernel/rcu/tasks.h >>> +++ b/kernel/rcu/tasks.h >>> @@ -39,7 +39,6 @@ struct rcu_tasks_percpu { >>> unsigned long rtp_jiffies; >>> unsigned long rtp_n_lock_retries; >>> struct work_struct rtp_work; >>> - struct irq_work rtp_irq_work; >>> struct rcu_head barrier_q_head; >>> struct list_head rtp_blkd_tasks; >>> int cpu; >>> @@ -112,12 +111,9 @@ struct rcu_tasks { >>> char *kname; >>> }; >>> >>> -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp); >>> - >>> #define DEFINE_RCU_TASKS(rt_name, gp, call, n) >>> \ >>> static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { >>> \ >>> .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## >>> __percpu.cbs_pcpu_lock), \ >>> - .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup), >>> \ >>> }; >>> \ >>> static struct rcu_tasks rt_name = >>> \ >>> { >>> \ >>> @@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp) >>> pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, >>> data_race(rtp->percpu_enqueue_shift), data_race(rtp- >>>> percpu_enqueue_lim)); >>> } >>> >>> -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic(). >>> -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{ >>> - struct rcu_tasks *rtp; >>> - struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct >>> rcu_tasks_percpu, rtp_irq_work); >>> - >>> - rtp = rtpcp->rtpp; >>> - rcuwait_wake_up(&rtp->cbs_wait); >>> -} >>> - >>> // Enqueue a callback for the specified flavor of Tasks RCU. >>> static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, >>> struct rcu_tasks *rtp) >>> @@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head >>> *rhp, rcu_callback_t func, >>> rcu_read_unlock(); >>> /* We can't create the thread unless interrupts are enabled. */ >>> if (needwake && READ_ONCE(rtp->kthread_ptr)) >>> - irq_work_queue(&rtpcp->rtp_irq_work); >>> + rcuwait_wake_up(&rtp->cbs_wait); >>> } >>> >>> // RCU callback function for rcu_barrier_tasks_generic(). >>> -- >>> 2.25.1 >>
On Thu, Feb 23, 2023 at 11:57:54AM -0500, Joel Fernandes wrote: > > > > On Feb 23, 2023, at 11:10 AM, Joel Fernandes <joel@joelfernandes.org> wrote: > > > > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote: > >>> From: Zqiang <qiang1.zhang@intel.com> > >>> Sent: Thursday, February 23, 2023 2:30 PM > >>> To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com; > >>> joel@joelfernandes.org > >>> Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org > >>> Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in > >>> call_rcu_tasks_generic() > >>> > >>> According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from > >>> call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup > >>> using irq_work_queue() is because if the caller of > >>> call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with > >>> CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in > >>> wake_up(), so the lockdep splats will happen. but now using > >>> rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in > >>> rcuwait_wake_up() no spinlock will be acquired, so this commit remove using > >>> > >>> There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up(). > >>> > >>> rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then: > >>> > >>> raw_spin_lock_irqsave() > >>> ... > >>> raw_spin_unlock_irqrestore > >> > >> Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to > >> sleepable lock in Preempt-RT kernel, but raw spinlock is not change). > >> > >> acquire raw_spinlock -> acquire spinlock will trigger lockdep warning. > > > > Is this really safe in the long run though? I seem to remember there are > > weird locking dependencies if RCU is used from within the scheduler [1]. > > > > I prefer to keep it as irq_work_queue() unless you are seeing some benefit. > > Generally, there has to be a 'win' or other justification for adding more > > risk. > > On second thought, you are deleting a decent number of lines. > > What do others think? > > I will take a closer look later, I am interested in researching the new lock dependency this adds. One place to start is rcu_read_unlock_trace_special(), keeping firmly in mind that rcu_read_unlock_trace() is intended to be invoked from a great many places. Thanx, Paul > - Joel > > > > > thanks, > > > > - Joel > > [1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html > > > >>> irq_work_queue(), invoke rcuwait_wake_up() directly in > >>> call_rcu_tasks_generic(). > >>> > >>> Signed-off-by: Zqiang <qiang1.zhang@intel.com> > >>> --- > >>> kernel/rcu/tasks.h | 16 +--------------- > >>> 1 file changed, 1 insertion(+), 15 deletions(-) > >>> > >>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index > >>> baf7ec178155..757b8c6da1ad 100644 > >>> --- a/kernel/rcu/tasks.h > >>> +++ b/kernel/rcu/tasks.h > >>> @@ -39,7 +39,6 @@ struct rcu_tasks_percpu { > >>> unsigned long rtp_jiffies; > >>> unsigned long rtp_n_lock_retries; > >>> struct work_struct rtp_work; > >>> - struct irq_work rtp_irq_work; > >>> struct rcu_head barrier_q_head; > >>> struct list_head rtp_blkd_tasks; > >>> int cpu; > >>> @@ -112,12 +111,9 @@ struct rcu_tasks { > >>> char *kname; > >>> }; > >>> > >>> -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp); > >>> - > >>> #define DEFINE_RCU_TASKS(rt_name, gp, call, n) > >>> \ > >>> static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { > >>> \ > >>> .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## > >>> __percpu.cbs_pcpu_lock), \ > >>> - .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup), > >>> \ > >>> }; > >>> \ > >>> static struct rcu_tasks rt_name = > >>> \ > >>> { > >>> \ > >>> @@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp) > >>> pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, > >>> data_race(rtp->percpu_enqueue_shift), data_race(rtp- > >>>> percpu_enqueue_lim)); > >>> } > >>> > >>> -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic(). > >>> -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{ > >>> - struct rcu_tasks *rtp; > >>> - struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct > >>> rcu_tasks_percpu, rtp_irq_work); > >>> - > >>> - rtp = rtpcp->rtpp; > >>> - rcuwait_wake_up(&rtp->cbs_wait); > >>> -} > >>> - > >>> // Enqueue a callback for the specified flavor of Tasks RCU. > >>> static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, > >>> struct rcu_tasks *rtp) > >>> @@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head > >>> *rhp, rcu_callback_t func, > >>> rcu_read_unlock(); > >>> /* We can't create the thread unless interrupts are enabled. */ > >>> if (needwake && READ_ONCE(rtp->kthread_ptr)) > >>> - irq_work_queue(&rtpcp->rtp_irq_work); > >>> + rcuwait_wake_up(&rtp->cbs_wait); > >>> } > >>> > >>> // RCU callback function for rcu_barrier_tasks_generic(). > >>> -- > >>> 2.25.1 > >>
On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote: > > From: Zqiang <qiang1.zhang@intel.com> > > Sent: Thursday, February 23, 2023 2:30 PM > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com; > > joel@joelfernandes.org > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in > > call_rcu_tasks_generic() > > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup > > using irq_work_queue() is because if the caller of > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in > > wake_up(), so the lockdep splats will happen. but now using > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using > > > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up(). > > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then: > > > > raw_spin_lock_irqsave() > > ... > > raw_spin_unlock_irqrestore > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to > sleepable lock in Preempt-RT kernel, but raw spinlock is not change). > > acquire raw_spinlock -> acquire spinlock will trigger lockdep warning. > >Is this really safe in the long run though? I seem to remember there are >weird locking dependencies if RCU is used from within the scheduler [1]. > I have been running rcutorture with rcutorture.type = tasks-tracing, so far no problems have been found. >I prefer to keep it as irq_work_queue() unless you are seeing some benefit. >Generally, there has to be a 'win' or other justification for adding more >risk. > >thanks, > >- Joel >[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html The problem in this link, in an earlier RCU version, rcu_read_unlock_special() Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay more attention to rcu_read_unlock_trace_special() Thanks Zqiang > > > irq_work_queue(), invoke rcuwait_wake_up() directly in > > call_rcu_tasks_generic(). > > > > Signed-off-by: Zqiang <qiang1.zhang@intel.com> > > --- > > kernel/rcu/tasks.h | 16 +--------------- > > 1 file changed, 1 insertion(+), 15 deletions(-) > > > > diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index > > baf7ec178155..757b8c6da1ad 100644 > > --- a/kernel/rcu/tasks.h > > +++ b/kernel/rcu/tasks.h > > @@ -39,7 +39,6 @@ struct rcu_tasks_percpu { > > unsigned long rtp_jiffies; > > unsigned long rtp_n_lock_retries; > > struct work_struct rtp_work; > > - struct irq_work rtp_irq_work; > > struct rcu_head barrier_q_head; > > struct list_head rtp_blkd_tasks; > > int cpu; > > @@ -112,12 +111,9 @@ struct rcu_tasks { > > char *kname; > > }; > > > > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp); > > - > > #define DEFINE_RCU_TASKS(rt_name, gp, call, n) > > \ > > static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { > > \ > > .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## > > __percpu.cbs_pcpu_lock), \ > > - .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup), > > \ > > }; > > \ > > static struct rcu_tasks rt_name = > > \ > > { > > \ > > @@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp) > > pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, > > data_race(rtp->percpu_enqueue_shift), data_race(rtp- > > >percpu_enqueue_lim)); > > } > > > > -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic(). > > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{ > > - struct rcu_tasks *rtp; > > - struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct > > rcu_tasks_percpu, rtp_irq_work); > > - > > - rtp = rtpcp->rtpp; > > - rcuwait_wake_up(&rtp->cbs_wait); > > -} > > - > > // Enqueue a callback for the specified flavor of Tasks RCU. > > static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, > > struct rcu_tasks *rtp) > > @@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head > > *rhp, rcu_callback_t func, > > rcu_read_unlock(); > > /* We can't create the thread unless interrupts are enabled. */ > > if (needwake && READ_ONCE(rtp->kthread_ptr)) > > - irq_work_queue(&rtpcp->rtp_irq_work); > > + rcuwait_wake_up(&rtp->cbs_wait); > > } > > > > // RCU callback function for rcu_barrier_tasks_generic(). > > -- > > 2.25.1 >
On Fri, Feb 24, 2023 at 12:36:05AM +0000, Zhang, Qiang1 wrote: > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote: > > > From: Zqiang <qiang1.zhang@intel.com> > > > Sent: Thursday, February 23, 2023 2:30 PM > > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com; > > > joel@joelfernandes.org > > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org > > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in > > > call_rcu_tasks_generic() > > > > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from > > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup > > > using irq_work_queue() is because if the caller of > > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with > > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in > > > wake_up(), so the lockdep splats will happen. but now using > > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in > > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using > > > > > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up(). > > > > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then: > > > > > > raw_spin_lock_irqsave() > > > ... > > > raw_spin_unlock_irqrestore > > > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to > > sleepable lock in Preempt-RT kernel, but raw spinlock is not change). > > > > acquire raw_spinlock -> acquire spinlock will trigger lockdep warning. > > > >Is this really safe in the long run though? I seem to remember there are > >weird locking dependencies if RCU is used from within the scheduler [1]. > > > > > I have been running rcutorture with rcutorture.type = tasks-tracing, > so far no problems have been found. > > > >I prefer to keep it as irq_work_queue() unless you are seeing some benefit. > >Generally, there has to be a 'win' or other justification for adding more > >risk. > > > >thanks, > > > >- Joel > >[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html > > > The problem in this link, in an earlier RCU version, rcu_read_unlock_special() > Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for > call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay > more attention to rcu_read_unlock_trace_special() Consider ABBA deadlocks as well, not just self-deadlocks (which IIRC is what the straight-RCU rcu_read_unlock() issues were about). What prevents the following scenario? In the scheduler you have code like this: rq = task_rq_lock(p, &rf); trace_sched_wait_task(p); Someone can hook up a BPF program to that tracepoint that then calls rcu_read_unlock_trace() -> rcu_read_unlock_trace_special(). All of this while holding the rq and pi scheduler locks. That's A (rq lock) -> B (rtpcp lock). In another path, your change adds the following dependency due to doing wakeup under the rtpcp lock. That's call_rcu_tasks_generic() -> B (rtpcp lock) -> A (rq lock). Maybe there is some other state that prevents this case, but it still makes me queasy specially since there is perhaps no benefit more than deleting a few lines of code. Either way, nice observation! Btw, the way irq_work works is quite interesting, so I guess what it does is it does a self-IPI and then runs the callback in hard IRQ context, without holding any locks. Another interesting fact is, there is also a "lazy" version of the IRQ work API (IRQ_WORK_INIT_LAZY) which seems currently to be used by printk. This executes the work from the scheduler tick instead of an IPI handler unless the tick is stopped. thanks, - Joel > > Thanks > Zqiang > > > > > > irq_work_queue(), invoke rcuwait_wake_up() directly in > > > call_rcu_tasks_generic(). > > > > > > Signed-off-by: Zqiang <qiang1.zhang@intel.com> > > > --- > > > kernel/rcu/tasks.h | 16 +--------------- > > > 1 file changed, 1 insertion(+), 15 deletions(-) > > > > > > diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index > > > baf7ec178155..757b8c6da1ad 100644 > > > --- a/kernel/rcu/tasks.h > > > +++ b/kernel/rcu/tasks.h > > > @@ -39,7 +39,6 @@ struct rcu_tasks_percpu { > > > unsigned long rtp_jiffies; > > > unsigned long rtp_n_lock_retries; > > > struct work_struct rtp_work; > > > - struct irq_work rtp_irq_work; > > > struct rcu_head barrier_q_head; > > > struct list_head rtp_blkd_tasks; > > > int cpu; > > > @@ -112,12 +111,9 @@ struct rcu_tasks { > > > char *kname; > > > }; > > > > > > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp); > > > - > > > #define DEFINE_RCU_TASKS(rt_name, gp, call, n) > > > \ > > > static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { > > > \ > > > .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## > > > __percpu.cbs_pcpu_lock), \ > > > - .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup), > > > \ > > > }; > > > \ > > > static struct rcu_tasks rt_name = > > > \ > > > { > > > \ > > > @@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp) > > > pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, > > > data_race(rtp->percpu_enqueue_shift), data_race(rtp- > > > >percpu_enqueue_lim)); > > > } > > > > > > -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic(). > > > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{ > > > - struct rcu_tasks *rtp; > > > - struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct > > > rcu_tasks_percpu, rtp_irq_work); > > > - > > > - rtp = rtpcp->rtpp; > > > - rcuwait_wake_up(&rtp->cbs_wait); > > > -} > > > - > > > // Enqueue a callback for the specified flavor of Tasks RCU. > > > static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, > > > struct rcu_tasks *rtp) > > > @@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head > > > *rhp, rcu_callback_t func, > > > rcu_read_unlock(); > > > /* We can't create the thread unless interrupts are enabled. */ > > > if (needwake && READ_ONCE(rtp->kthread_ptr)) > > > - irq_work_queue(&rtpcp->rtp_irq_work); > > > + rcuwait_wake_up(&rtp->cbs_wait); > > > } > > > > > > // RCU callback function for rcu_barrier_tasks_generic(). > > > -- > > > 2.25.1 > >
On Thu, Feb 23, 2023 at 9:25 PM Joel Fernandes <joel@joelfernandes.org> wrote: > > On Fri, Feb 24, 2023 at 12:36:05AM +0000, Zhang, Qiang1 wrote: > > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote: > > > > From: Zqiang <qiang1.zhang@intel.com> > > > > Sent: Thursday, February 23, 2023 2:30 PM > > > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com; > > > > joel@joelfernandes.org > > > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org > > > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in > > > > call_rcu_tasks_generic() > > > > > > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from > > > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup > > > > using irq_work_queue() is because if the caller of > > > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with > > > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in > > > > wake_up(), so the lockdep splats will happen. but now using > > > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in > > > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using > > > > > > > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up(). > > > > > > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then: > > > > > > > > raw_spin_lock_irqsave() > > > > ... > > > > raw_spin_unlock_irqrestore > > > > > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to > > > sleepable lock in Preempt-RT kernel, but raw spinlock is not change). > > > > > > acquire raw_spinlock -> acquire spinlock will trigger lockdep warning. > > > > > >Is this really safe in the long run though? I seem to remember there are > > >weird locking dependencies if RCU is used from within the scheduler [1]. > > > > > > > > > I have been running rcutorture with rcutorture.type = tasks-tracing, > > so far no problems have been found. > > > > > > >I prefer to keep it as irq_work_queue() unless you are seeing some benefit. > > >Generally, there has to be a 'win' or other justification for adding more > > >risk. > > > > > >thanks, > > > > > >- Joel > > >[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html > > > > > > The problem in this link, in an earlier RCU version, rcu_read_unlock_special() > > Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for > > call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay > > more attention to rcu_read_unlock_trace_special() > > Consider ABBA deadlocks as well, not just self-deadlocks (which IIRC is what > the straight-RCU rcu_read_unlock() issues were about). > > What prevents the following scenario? > > In the scheduler you have code like this: > rq = task_rq_lock(p, &rf); > trace_sched_wait_task(p); > > Someone can hook up a BPF program to that tracepoint that then calls > rcu_read_unlock_trace() -> rcu_read_unlock_trace_special(). All of > this while holding the rq and pi scheduler locks. > > That's A (rq lock) -> B (rtpcp lock). > > In another path, your change adds the following dependency due to doing > wakeup under the rtpcp lock. > > That's call_rcu_tasks_generic() -> B (rtpcp lock) -> A (rq lock). I would like to correct this last statement. That cannot happen but the concern I guess is, can the following happen due to the change? call_rcu_tasks_generic() -> B (some BPF lock) -> A (rq lock) So by doing a wakeup in this change, you have added the dependency for callers of call_rcu_tasks_trace() . Now, the BPF program is called from the trace point above and you have ABBA deadlock possibility. - Joel
On Thu, Feb 23, 2023 at 9:35 PM Joel Fernandes <joel@joelfernandes.org> wrote: > > On Thu, Feb 23, 2023 at 9:25 PM Joel Fernandes <joel@joelfernandes.org> wrote: > > > > On Fri, Feb 24, 2023 at 12:36:05AM +0000, Zhang, Qiang1 wrote: > > > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote: > > > > > From: Zqiang <qiang1.zhang@intel.com> > > > > > Sent: Thursday, February 23, 2023 2:30 PM > > > > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com; > > > > > joel@joelfernandes.org > > > > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org > > > > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in > > > > > call_rcu_tasks_generic() > > > > > > > > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from > > > > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup > > > > > using irq_work_queue() is because if the caller of > > > > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with > > > > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in > > > > > wake_up(), so the lockdep splats will happen. but now using > > > > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in > > > > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using > > > > > > > > > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up(). > > > > > > > > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then: > > > > > > > > > > raw_spin_lock_irqsave() > > > > > ... > > > > > raw_spin_unlock_irqrestore > > > > > > > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to > > > > sleepable lock in Preempt-RT kernel, but raw spinlock is not change). > > > > > > > > acquire raw_spinlock -> acquire spinlock will trigger lockdep warning. > > > > > > > >Is this really safe in the long run though? I seem to remember there are > > > >weird locking dependencies if RCU is used from within the scheduler [1]. > > > > > > > > > > > > > I have been running rcutorture with rcutorture.type = tasks-tracing, > > > so far no problems have been found. > > > > > > > > > >I prefer to keep it as irq_work_queue() unless you are seeing some benefit. > > > >Generally, there has to be a 'win' or other justification for adding more > > > >risk. > > > > > > > >thanks, > > > > > > > >- Joel > > > >[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html > > > > > > > > > The problem in this link, in an earlier RCU version, rcu_read_unlock_special() > > > Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for > > > call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay > > > more attention to rcu_read_unlock_trace_special() > > > > Consider ABBA deadlocks as well, not just self-deadlocks (which IIRC is what > > the straight-RCU rcu_read_unlock() issues were about). > > > > What prevents the following scenario? > > > > In the scheduler you have code like this: > > rq = task_rq_lock(p, &rf); > > trace_sched_wait_task(p); > > > > Someone can hook up a BPF program to that tracepoint that then calls > > rcu_read_unlock_trace() -> rcu_read_unlock_trace_special(). All of > > this while holding the rq and pi scheduler locks. > > > > That's A (rq lock) -> B (rtpcp lock). > > > > In another path, your change adds the following dependency due to doing > > wakeup under the rtpcp lock. > > > > That's call_rcu_tasks_generic() -> B (rtpcp lock) -> A (rq lock). > > I would like to correct this last statement. That cannot happen but > the concern I guess is, can the following happen due to the change? > > call_rcu_tasks_generic() -> B (some BPF lock) -> A (rq lock) > Aaargh, one more correction: B (some BPF lock) -> call_rcu_tasks_generic() -> -> A (rq lock) ;-) -Joel
On Thu, Feb 23, 2023 at 9:35 PM Joel Fernandes <joel@joelfernandes.org> wrote: > > On Thu, Feb 23, 2023 at 9:25 PM Joel Fernandes <joel@joelfernandes.org> wrote: > > > > On Fri, Feb 24, 2023 at 12:36:05AM +0000, Zhang, Qiang1 wrote: > > > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote: > > > > > From: Zqiang <qiang1.zhang@intel.com> > > > > > Sent: Thursday, February 23, 2023 2:30 PM > > > > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com; > > > > > joel@joelfernandes.org > > > > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org > > > > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in > > > > > call_rcu_tasks_generic() > > > > > > > > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from > > > > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup > > > > > using irq_work_queue() is because if the caller of > > > > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with > > > > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in > > > > > wake_up(), so the lockdep splats will happen. but now using > > > > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in > > > > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using > > > > > > > > > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up(). > > > > > > > > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then: > > > > > > > > > > raw_spin_lock_irqsave() > > > > > ... > > > > > raw_spin_unlock_irqrestore > > > > > > > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to > > > > sleepable lock in Preempt-RT kernel, but raw spinlock is not change). > > > > > > > > acquire raw_spinlock -> acquire spinlock will trigger lockdep warning. > > > > > > > >Is this really safe in the long run though? I seem to remember there are > > > >weird locking dependencies if RCU is used from within the scheduler [1]. > > > > > > > > > > > > > I have been running rcutorture with rcutorture.type = tasks-tracing, > > > so far no problems have been found. > > > > > > > > > >I prefer to keep it as irq_work_queue() unless you are seeing some benefit. > > > >Generally, there has to be a 'win' or other justification for adding more > > > >risk. > > > > > > > >thanks, > > > > > > > >- Joel > > > >[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html > > > > > > > > > The problem in this link, in an earlier RCU version, rcu_read_unlock_special() > > > Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for > > > call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay > > > more attention to rcu_read_unlock_trace_special() > > > > Consider ABBA deadlocks as well, not just self-deadlocks (which IIRC is what > > the straight-RCU rcu_read_unlock() issues were about). > > > > What prevents the following scenario? > > > > In the scheduler you have code like this: > > rq = task_rq_lock(p, &rf); > > trace_sched_wait_task(p); > > > > Someone can hook up a BPF program to that tracepoint that then calls > > rcu_read_unlock_trace() -> rcu_read_unlock_trace_special(). All of > > this while holding the rq and pi scheduler locks. > > > > That's A (rq lock) -> B (rtpcp lock). In rcu_read_unlock_trace_special(), the premise of acquiring the rtpcp lock is that before that, we have task switch in the rcu_read_lock_trace/unlock_trace critical section. but after we already hold the rq lock, no task switch is generated in the rcu_read_lock_trace/unlock_trace critical section. Please correct me if my understanding is wrong. Thanks Zqiang > > > > In another path, your change adds the following dependency due to doing > > wakeup under the rtpcp lock. > > > > That's call_rcu_tasks_generic() -> B (rtpcp lock) -> A (rq lock). > > I would like to correct this last statement. That cannot happen but > the concern I guess is, can the following happen due to the change? > > call_rcu_tasks_generic() -> B (some BPF lock) -> A (rq lock) > > >Aaargh, one more correction: >B (some BPF lock) -> call_rcu_tasks_generic() -> -> A (rq lock) > > ;-) > >-Joel
On Thu, Feb 23, 2023 at 10:05 PM Zhang, Qiang1 <qiang1.zhang@intel.com> wrote: > > On Thu, Feb 23, 2023 at 9:35 PM Joel Fernandes <joel@joelfernandes.org> wrote: > > > > On Thu, Feb 23, 2023 at 9:25 PM Joel Fernandes <joel@joelfernandes.org> wrote: > > > > > > On Fri, Feb 24, 2023 at 12:36:05AM +0000, Zhang, Qiang1 wrote: > > > > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote: > > > > > > From: Zqiang <qiang1.zhang@intel.com> > > > > > > Sent: Thursday, February 23, 2023 2:30 PM > > > > > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com; > > > > > > joel@joelfernandes.org > > > > > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org > > > > > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in > > > > > > call_rcu_tasks_generic() > > > > > > > > > > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from > > > > > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup > > > > > > using irq_work_queue() is because if the caller of > > > > > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with > > > > > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in > > > > > > wake_up(), so the lockdep splats will happen. but now using > > > > > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in > > > > > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using > > > > > > > > > > > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up(). > > > > > > > > > > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then: > > > > > > > > > > > > raw_spin_lock_irqsave() > > > > > > ... > > > > > > raw_spin_unlock_irqrestore > > > > > > > > > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to > > > > > sleepable lock in Preempt-RT kernel, but raw spinlock is not change). > > > > > > > > > > acquire raw_spinlock -> acquire spinlock will trigger lockdep warning. > > > > > > > > > >Is this really safe in the long run though? I seem to remember there are > > > > >weird locking dependencies if RCU is used from within the scheduler [1]. > > > > > > > > > > > > > > > > > I have been running rcutorture with rcutorture.type = tasks-tracing, > > > > so far no problems have been found. > > > > > > > > > > > > >I prefer to keep it as irq_work_queue() unless you are seeing some benefit. > > > > >Generally, there has to be a 'win' or other justification for adding more > > > > >risk. > > > > > > > > > >thanks, > > > > > > > > > >- Joel > > > > >[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html > > > > > > > > > > > > The problem in this link, in an earlier RCU version, rcu_read_unlock_special() > > > > Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for > > > > call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay > > > > more attention to rcu_read_unlock_trace_special() > > > > > > Consider ABBA deadlocks as well, not just self-deadlocks (which IIRC is what > > > the straight-RCU rcu_read_unlock() issues were about). > > > > > > What prevents the following scenario? > > > > > > In the scheduler you have code like this: > > > rq = task_rq_lock(p, &rf); > > > trace_sched_wait_task(p); > > > > > > Someone can hook up a BPF program to that tracepoint that then calls > > > rcu_read_unlock_trace() -> rcu_read_unlock_trace_special(). All of > > > this while holding the rq and pi scheduler locks. > > > > > > That's A (rq lock) -> B (rtpcp lock). > > In rcu_read_unlock_trace_special(), the premise of acquiring the rtpcp lock is that > before that, we have task switch in the rcu_read_lock_trace/unlock_trace critical section. > but after we already hold the rq lock, no task switch is generated in the > rcu_read_lock_trace/unlock_trace critical section. > > Please correct me if my understanding is wrong. Yes, but in the next reply I corrected myself and I am still concerned about ABBA. There is obviously *some lock* that is held by the callers of call_rcu_tasks*(). So there is a dependency that gets created between _that_ lock and the rq lock, if you do a wakeup here. And I am not sure whether that lock is also acquired when the BPF program runs. If it is, then the BPF programs may hang. It is probably worth checking with the BPF guys. More importantly, do you see a benefit with this change in terms of anything more than deleting a few lines of code? Paul typically favors robustness and guard rails (as do I), unless there is significant benefit in performance, power or both. Thanks, - Joel
On Thu, Feb 23, 2023 at 10:05 PM Zhang, Qiang1 <qiang1.zhang@intel.com> wrote: > > On Thu, Feb 23, 2023 at 9:35 PM Joel Fernandes <joel@joelfernandes.org> wrote: > > > > On Thu, Feb 23, 2023 at 9:25 PM Joel Fernandes <joel@joelfernandes.org> wrote: > > > > > > On Fri, Feb 24, 2023 at 12:36:05AM +0000, Zhang, Qiang1 wrote: > > > > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote: > > > > > > From: Zqiang <qiang1.zhang@intel.com> > > > > > > Sent: Thursday, February 23, 2023 2:30 PM > > > > > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com; > > > > > > joel@joelfernandes.org > > > > > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org > > > > > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in > > > > > > call_rcu_tasks_generic() > > > > > > > > > > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from > > > > > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup > > > > > > using irq_work_queue() is because if the caller of > > > > > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with > > > > > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in > > > > > > wake_up(), so the lockdep splats will happen. but now using > > > > > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in > > > > > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using > > > > > > > > > > > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up(). > > > > > > > > > > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then: > > > > > > > > > > > > raw_spin_lock_irqsave() > > > > > > ... > > > > > > raw_spin_unlock_irqrestore > > > > > > > > > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to > > > > > sleepable lock in Preempt-RT kernel, but raw spinlock is not change). > > > > > > > > > > acquire raw_spinlock -> acquire spinlock will trigger lockdep warning. > > > > > > > > > >Is this really safe in the long run though? I seem to remember there are > > > > >weird locking dependencies if RCU is used from within the scheduler [1]. > > > > > > > > > > > > > > > > > I have been running rcutorture with rcutorture.type = tasks-tracing, > > > > so far no problems have been found. > > > > > > > > > > > > >I prefer to keep it as irq_work_queue() unless you are seeing some benefit. > > > > >Generally, there has to be a 'win' or other justification for adding more > > > > >risk. > > > > > > > > > >thanks, > > > > > > > > > >- Joel > > > > >[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html > > > > > > > > > > > > The problem in this link, in an earlier RCU version, rcu_read_unlock_special() > > > > Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for > > > > call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay > > > > more attention to rcu_read_unlock_trace_special() > > > > > > Consider ABBA deadlocks as well, not just self-deadlocks (which IIRC is what > > > the straight-RCU rcu_read_unlock() issues were about). > > > > > > What prevents the following scenario? > > > > > > In the scheduler you have code like this: > > > rq = task_rq_lock(p, &rf); > > > trace_sched_wait_task(p); > > > > > > Someone can hook up a BPF program to that tracepoint that then calls > > > rcu_read_unlock_trace() -> rcu_read_unlock_trace_special(). All of > > > this while holding the rq and pi scheduler locks. > > > > > > That's A (rq lock) -> B (rtpcp lock). > > In rcu_read_unlock_trace_special(), the premise of acquiring the rtpcp lock is that > before that, we have task switch in the rcu_read_lock_trace/unlock_trace critical section. > but after we already hold the rq lock, no task switch is generated in the > rcu_read_lock_trace/unlock_trace critical section. > > Please correct me if my understanding is wrong. > >Yes, but in the next reply I corrected myself and I am still concerned >about ABBA. There is obviously *some lock* that is held by the callers >of call_rcu_tasks*(). So there is a dependency that gets created >between _that_ lock and the rq lock, if you do a wakeup here. And I >am not sure whether that lock is also acquired when the BPF program >runs. If it is, then the BPF programs may hang. It is probably worth >checking with the BPF guys. > >More importantly, do you see a benefit with this change in terms of >anything more than deleting a few lines of code? Paul typically favors >robustness and guard rails (as do I), unless there is significant >benefit in performance, power or both. because I found that the purpose of using irq_work_queue() early is to solve the problem of lockep splat, my modified junior is also to avoid unnecessary IPI. but like you said, indeed we are not completely sure whether there is a potential lock dependency problem, so I agree your opinion. Thanks Zqiang > >Thanks, > > - Joel
On Thu, Feb 23, 2023 at 10:22 PM Zhang, Qiang1 <qiang1.zhang@intel.com> wrote: > > On Thu, Feb 23, 2023 at 10:05 PM Zhang, Qiang1 <qiang1.zhang@intel.com> wrote: > > > > On Thu, Feb 23, 2023 at 9:35 PM Joel Fernandes <joel@joelfernandes.org> wrote: > > > > > > On Thu, Feb 23, 2023 at 9:25 PM Joel Fernandes <joel@joelfernandes.org> wrote: > > > > > > > > On Fri, Feb 24, 2023 at 12:36:05AM +0000, Zhang, Qiang1 wrote: > > > > > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote: > > > > > > > From: Zqiang <qiang1.zhang@intel.com> > > > > > > > Sent: Thursday, February 23, 2023 2:30 PM > > > > > > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com; > > > > > > > joel@joelfernandes.org > > > > > > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org > > > > > > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in > > > > > > > call_rcu_tasks_generic() > > > > > > > > > > > > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from > > > > > > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup > > > > > > > using irq_work_queue() is because if the caller of > > > > > > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with > > > > > > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in > > > > > > > wake_up(), so the lockdep splats will happen. but now using > > > > > > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in > > > > > > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using > > > > > > > > > > > > > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up(). > > > > > > > > > > > > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then: > > > > > > > > > > > > > > raw_spin_lock_irqsave() > > > > > > > ... > > > > > > > raw_spin_unlock_irqrestore > > > > > > > > > > > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to > > > > > > sleepable lock in Preempt-RT kernel, but raw spinlock is not change). > > > > > > > > > > > > acquire raw_spinlock -> acquire spinlock will trigger lockdep warning. > > > > > > > > > > > >Is this really safe in the long run though? I seem to remember there are > > > > > >weird locking dependencies if RCU is used from within the scheduler [1]. > > > > > > > > > > > > > > > > > > > > > I have been running rcutorture with rcutorture.type = tasks-tracing, > > > > > so far no problems have been found. > > > > > > > > > > > > > > > >I prefer to keep it as irq_work_queue() unless you are seeing some benefit. > > > > > >Generally, there has to be a 'win' or other justification for adding more > > > > > >risk. > > > > > > > > > > > >thanks, > > > > > > > > > > > >- Joel > > > > > >[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html > > > > > > > > > > > > > > > The problem in this link, in an earlier RCU version, rcu_read_unlock_special() > > > > > Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for > > > > > call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay > > > > > more attention to rcu_read_unlock_trace_special() > > > > > > > > Consider ABBA deadlocks as well, not just self-deadlocks (which IIRC is what > > > > the straight-RCU rcu_read_unlock() issues were about). > > > > > > > > What prevents the following scenario? > > > > > > > > In the scheduler you have code like this: > > > > rq = task_rq_lock(p, &rf); > > > > trace_sched_wait_task(p); > > > > > > > > Someone can hook up a BPF program to that tracepoint that then calls > > > > rcu_read_unlock_trace() -> rcu_read_unlock_trace_special(). All of > > > > this while holding the rq and pi scheduler locks. > > > > > > > > That's A (rq lock) -> B (rtpcp lock). > > > > In rcu_read_unlock_trace_special(), the premise of acquiring the rtpcp lock is that > > before that, we have task switch in the rcu_read_lock_trace/unlock_trace critical section. > > but after we already hold the rq lock, no task switch is generated in the > > rcu_read_lock_trace/unlock_trace critical section. > > > > Please correct me if my understanding is wrong. > > > >Yes, but in the next reply I corrected myself and I am still concerned > >about ABBA. There is obviously *some lock* that is held by the callers > >of call_rcu_tasks*(). So there is a dependency that gets created > >between _that_ lock and the rq lock, if you do a wakeup here. And I > >am not sure whether that lock is also acquired when the BPF program > >runs. If it is, then the BPF programs may hang. It is probably worth > >checking with the BPF guys. > > > >More importantly, do you see a benefit with this change in terms of > >anything more than deleting a few lines of code? Paul typically favors > >robustness and guard rails (as do I), unless there is significant > >benefit in performance, power or both. > > because I found that the purpose of using irq_work_queue() early is to solve the problem of lockep splat, > my modified junior is also to avoid unnecessary IPI. irq_work_queue() which this code uses does a self-IPI, unlike irq_work_queue_on() which AFAIK is significantly cheaper on ARM64 than cross-CPU IPIs. Not sure about x86 though. Another way to avoid IPI sometimes is making the IRQ work lazy. It will then do self-IPI only if the tick is turned off. But I'm not sure if that is any better than leaving the code as-is. > but like you said, indeed we are not completely sure > whether there is a potential lock dependency problem, so I agree your opinion. Ok and thanks for digging into it. This was fun! - Joel > > Thanks > Zqiang > > > > >Thanks, > > > > - Joel
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index baf7ec178155..757b8c6da1ad 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -39,7 +39,6 @@ struct rcu_tasks_percpu { unsigned long rtp_jiffies; unsigned long rtp_n_lock_retries; struct work_struct rtp_work; - struct irq_work rtp_irq_work; struct rcu_head barrier_q_head; struct list_head rtp_blkd_tasks; int cpu; @@ -112,12 +111,9 @@ struct rcu_tasks { char *kname; }; -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp); - #define DEFINE_RCU_TASKS(rt_name, gp, call, n) \ static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { \ .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## __percpu.cbs_pcpu_lock), \ - .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup), \ }; \ static struct rcu_tasks rt_name = \ { \ @@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp) pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim)); } -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic(). -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{ - struct rcu_tasks *rtp; - struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct rcu_tasks_percpu, rtp_irq_work); - - rtp = rtpcp->rtpp; - rcuwait_wake_up(&rtp->cbs_wait); -} - // Enqueue a callback for the specified flavor of Tasks RCU. static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, struct rcu_tasks *rtp) @@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, rcu_read_unlock(); /* We can't create the thread unless interrupts are enabled. */ if (needwake && READ_ONCE(rtp->kthread_ptr)) - irq_work_queue(&rtpcp->rtp_irq_work); + rcuwait_wake_up(&rtp->cbs_wait); } // RCU callback function for rcu_barrier_tasks_generic().
According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup using irq_work_queue() is because if the caller of call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in wake_up(), so the lockdep splats will happen. but now using rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in rcuwait_wake_up() no spinlock will be acquired, so this commit remove using irq_work_queue(), invoke rcuwait_wake_up() directly in call_rcu_tasks_generic(). Signed-off-by: Zqiang <qiang1.zhang@intel.com> --- kernel/rcu/tasks.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-)