Message ID | 20220301010412.431299-3-namhyung@kernel.org (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
Series | locking: Add new lock contention tracepoints (v2) | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Not a local patch |
On Mon, Feb 28, 2022 at 05:04:10PM -0800, Namhyung Kim wrote: > @@ -80,7 +81,9 @@ static inline void queued_read_lock(struct qrwlock *lock) > return; > > /* The slowpath will decrement the reader count, if necessary. */ > + LOCK_CONTENTION_BEGIN(lock, LCB_F_READ); > queued_read_lock_slowpath(lock); > + LOCK_CONTENTION_END(lock); > } > > /** > @@ -94,7 +97,9 @@ static inline void queued_write_lock(struct qrwlock *lock) > if (likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED))) > return; > > + LOCK_CONTENTION_BEGIN(lock, LCB_F_WRITE); > queued_write_lock_slowpath(lock); > + LOCK_CONTENTION_END(lock); > } > @@ -82,7 +83,9 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) > if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) > return; > > + LOCK_CONTENTION_BEGIN(lock, 0); > queued_spin_lock_slowpath(lock, val); > + LOCK_CONTENTION_END(lock); > } Can you please stick that _inside_ the slowpath? You really don't want to inline that.
On Mon, Feb 28, 2022 at 05:04:10PM -0800, Namhyung Kim wrote: > @@ -171,9 +172,12 @@ bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) > if (try) > return false; > > + trace_contention_begin(sem, _RET_IP_, > + LCB_F_READ | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE); That is a bit unwieldy, isn't it ? > preempt_enable(); > percpu_rwsem_wait(sem, /* .reader = */ true); > preempt_disable(); > + trace_contention_end(sem); > > return true; > } > @@ -224,8 +228,13 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem) > * Try set sem->block; this provides writer-writer exclusion. > * Having sem->block set makes new readers block. > */ > - if (!__percpu_down_write_trylock(sem)) > + if (!__percpu_down_write_trylock(sem)) { > + unsigned int flags = LCB_F_WRITE | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE; > + > + trace_contention_begin(sem, _RET_IP_, flags); > percpu_rwsem_wait(sem, /* .reader = */ false); > + trace_contention_end(sem); > + } > > /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */ > Wouldn't it be easier to stick all that inside percpu_rwsem_wait() and have it only once? You can even re-frob the wait loop such that the tracepoint can use current->__state or something. diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index c9fdae94e098..ca01f8ff88e5 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -154,13 +154,16 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader) } spin_unlock_irq(&sem->waiters.lock); + set_current_state(TASK_UNINTERRUPTIBLE); + trace_contention_begin(sem, _RET_IP_, LCB_F_PERCPU | LCB_F_WRITE*!reader); while (wait) { - set_current_state(TASK_UNINTERRUPTIBLE); if (!smp_load_acquire(&wq_entry.private)) break; schedule(); + set_current_state(TASK_UNINTERRUPTIBLE); } __set_current_state(TASK_RUNNING); + trace_contention_end(sem); } bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) > diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c > index 8555c4efe97c..e49f5d2a232b 100644 > --- a/kernel/locking/rtmutex.c > +++ b/kernel/locking/rtmutex.c > @@ -24,6 +24,8 @@ > #include <linux/sched/wake_q.h> > #include <linux/ww_mutex.h> > > +#include <trace/events/lock.h> > + > #include "rtmutex_common.h" > > #ifndef WW_RT > @@ -1652,10 +1654,16 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, > static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, > unsigned int state) > { > + int ret; > + > if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) > return 0; > > - return rt_mutex_slowlock(lock, NULL, state); > + trace_contention_begin(lock, _RET_IP_, LCB_F_RT | state); > + ret = rt_mutex_slowlock(lock, NULL, state); > + trace_contention_end(lock); > + > + return ret; > } > #endif /* RT_MUTEX_BUILD_MUTEX */ > > @@ -1718,9 +1726,11 @@ static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) > { > unsigned long flags; > > + trace_contention_begin(lock, _RET_IP_, LCB_F_RT | TASK_RTLOCK_WAIT); > raw_spin_lock_irqsave(&lock->wait_lock, flags); > rtlock_slowlock_locked(lock); > raw_spin_unlock_irqrestore(&lock->wait_lock, flags); > + trace_contention_end(lock); > } Same, if you do it one level in, you can have the tracepoint itself look at current->__state. Also, you seem to have forgotten to trace the return value. Now you can't tell if the lock was acquired, or was denied (ww_mutex) or we were interrupted. diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 8555c4efe97c..18b9f4bf6f34 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1579,6 +1579,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, set_current_state(state); + trace_contention_begin(lock, _RET_IP_, LCB_F_RT); + ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); if (likely(!ret)) ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); @@ -1601,6 +1603,9 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, * unconditionally. We might have to fix that up. */ fixup_rt_mutex_waiters(lock); + + trace_contention_end(lock, ret); + return ret; } @@ -1683,6 +1688,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) /* Save current state and set state to TASK_RTLOCK_WAIT */ current_save_and_set_rtlock_wait_state(); + trace_contention_begin(lock, _RET_IP_, LCB_F_RT); + task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); for (;;) { @@ -1703,6 +1710,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) set_current_state(TASK_RTLOCK_WAIT); } + trace_contention_end(lock, 0); + /* Restore the task state */ current_restore_rtlock_saved_state();
On Tue, 1 Mar 2022 10:03:54 +0100 Peter Zijlstra <peterz@infradead.org> wrote: > diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c > index 8555c4efe97c..18b9f4bf6f34 100644 > --- a/kernel/locking/rtmutex.c > +++ b/kernel/locking/rtmutex.c > @@ -1579,6 +1579,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, > > set_current_state(state); > > + trace_contention_begin(lock, _RET_IP_, LCB_F_RT); I guess one issue with this is that _RET_IP_ will return the rt_mutex address if this is not inlined, making the _RET_IP_ rather useless. Now, if we can pass the _RET_IP_ into __rt_mutex_slowlock() then we could reference that. -- Steve > + > ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); > if (likely(!ret)) > ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); > @@ -1601,6 +1603,9 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, > * unconditionally. We might have to fix that up. > */ > fixup_rt_mutex_waiters(lock); > + > + trace_contention_end(lock, ret); > + > return ret; > }
Hi Peter, On Tue, Mar 1, 2022 at 12:44 AM Peter Zijlstra <peterz@infradead.org> wrote: > > On Mon, Feb 28, 2022 at 05:04:10PM -0800, Namhyung Kim wrote: > > @@ -80,7 +81,9 @@ static inline void queued_read_lock(struct qrwlock *lock) > > return; > > > > /* The slowpath will decrement the reader count, if necessary. */ > > + LOCK_CONTENTION_BEGIN(lock, LCB_F_READ); > > queued_read_lock_slowpath(lock); > > + LOCK_CONTENTION_END(lock); > > } > > > > /** > > @@ -94,7 +97,9 @@ static inline void queued_write_lock(struct qrwlock *lock) > > if (likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED))) > > return; > > > > + LOCK_CONTENTION_BEGIN(lock, LCB_F_WRITE); > > queued_write_lock_slowpath(lock); > > + LOCK_CONTENTION_END(lock); > > } > > > @@ -82,7 +83,9 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) > > if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) > > return; > > > > + LOCK_CONTENTION_BEGIN(lock, 0); > > queued_spin_lock_slowpath(lock, val); > > + LOCK_CONTENTION_END(lock); > > } > > Can you please stick that _inside_ the slowpath? You really don't want > to inline that. I can move it into the slow path with caller ip. Thanks, Namhyung
On Tue, Mar 1, 2022 at 1:04 AM Peter Zijlstra <peterz@infradead.org> wrote: > > On Mon, Feb 28, 2022 at 05:04:10PM -0800, Namhyung Kim wrote: > > > @@ -171,9 +172,12 @@ bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) > > if (try) > > return false; > > > > + trace_contention_begin(sem, _RET_IP_, > > + LCB_F_READ | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE); > > That is a bit unwieldy, isn't it ? > > > preempt_enable(); > > percpu_rwsem_wait(sem, /* .reader = */ true); > > preempt_disable(); > > + trace_contention_end(sem); > > > > return true; > > } > > @@ -224,8 +228,13 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem) > > * Try set sem->block; this provides writer-writer exclusion. > > * Having sem->block set makes new readers block. > > */ > > - if (!__percpu_down_write_trylock(sem)) > > + if (!__percpu_down_write_trylock(sem)) { > > + unsigned int flags = LCB_F_WRITE | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE; > > + > > + trace_contention_begin(sem, _RET_IP_, flags); > > percpu_rwsem_wait(sem, /* .reader = */ false); > > + trace_contention_end(sem); > > + } > > > > /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */ > > > > Wouldn't it be easier to stick all that inside percpu_rwsem_wait() and > have it only once? You can even re-frob the wait loop such that the > tracepoint can use current->__state or something. > > diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c > index c9fdae94e098..ca01f8ff88e5 100644 > --- a/kernel/locking/percpu-rwsem.c > +++ b/kernel/locking/percpu-rwsem.c > @@ -154,13 +154,16 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader) > } > spin_unlock_irq(&sem->waiters.lock); > > + set_current_state(TASK_UNINTERRUPTIBLE); > + trace_contention_begin(sem, _RET_IP_, LCB_F_PERCPU | LCB_F_WRITE*!reader); > while (wait) { > - set_current_state(TASK_UNINTERRUPTIBLE); > if (!smp_load_acquire(&wq_entry.private)) > break; > schedule(); > + set_current_state(TASK_UNINTERRUPTIBLE); > } > __set_current_state(TASK_RUNNING); > + trace_contention_end(sem); > } Looks good. I'll make similar changes in other places too. One general concern of moving inside is that it makes the _RET_IP_ useless. If we can pass the caller ip to the slowpath function, it'd be fine. > > > diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c > > index 8555c4efe97c..e49f5d2a232b 100644 > > --- a/kernel/locking/rtmutex.c > > +++ b/kernel/locking/rtmutex.c > > @@ -24,6 +24,8 @@ > > #include <linux/sched/wake_q.h> > > #include <linux/ww_mutex.h> > > > > +#include <trace/events/lock.h> > > + > > #include "rtmutex_common.h" > > > > #ifndef WW_RT > > @@ -1652,10 +1654,16 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, > > static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, > > unsigned int state) > > { > > + int ret; > > + > > if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) > > return 0; > > > > - return rt_mutex_slowlock(lock, NULL, state); > > + trace_contention_begin(lock, _RET_IP_, LCB_F_RT | state); > > + ret = rt_mutex_slowlock(lock, NULL, state); > > + trace_contention_end(lock); > > + > > + return ret; > > } > > #endif /* RT_MUTEX_BUILD_MUTEX */ > > > > @@ -1718,9 +1726,11 @@ static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) > > { > > unsigned long flags; > > > > + trace_contention_begin(lock, _RET_IP_, LCB_F_RT | TASK_RTLOCK_WAIT); > > raw_spin_lock_irqsave(&lock->wait_lock, flags); > > rtlock_slowlock_locked(lock); > > raw_spin_unlock_irqrestore(&lock->wait_lock, flags); > > + trace_contention_end(lock); > > } > > Same, if you do it one level in, you can have the tracepoint itself look > at current->__state. Also, you seem to have forgotten to trace the > return value. Now you can't tell if the lock was acquired, or was denied > (ww_mutex) or we were interrupted. Right, thanks for pointing that out. Will add that. Thanks, Namhyung > > diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c > index 8555c4efe97c..18b9f4bf6f34 100644 > --- a/kernel/locking/rtmutex.c > +++ b/kernel/locking/rtmutex.c > @@ -1579,6 +1579,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, > > set_current_state(state); > > + trace_contention_begin(lock, _RET_IP_, LCB_F_RT); > + > ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); > if (likely(!ret)) > ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); > @@ -1601,6 +1603,9 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, > * unconditionally. We might have to fix that up. > */ > fixup_rt_mutex_waiters(lock); > + > + trace_contention_end(lock, ret); > + > return ret; > } > > @@ -1683,6 +1688,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) > /* Save current state and set state to TASK_RTLOCK_WAIT */ > current_save_and_set_rtlock_wait_state(); > > + trace_contention_begin(lock, _RET_IP_, LCB_F_RT); > + > task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); > > for (;;) { > @@ -1703,6 +1710,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) > set_current_state(TASK_RTLOCK_WAIT); > } > > + trace_contention_end(lock, 0); > + > /* Restore the task state */ > current_restore_rtlock_saved_state(); >
Hi Steve, On Tue, Mar 1, 2022 at 6:45 AM Steven Rostedt <rostedt@goodmis.org> wrote: > > On Tue, 1 Mar 2022 10:03:54 +0100 > Peter Zijlstra <peterz@infradead.org> wrote: > > > diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c > > index 8555c4efe97c..18b9f4bf6f34 100644 > > --- a/kernel/locking/rtmutex.c > > +++ b/kernel/locking/rtmutex.c > > @@ -1579,6 +1579,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, > > > > set_current_state(state); > > > > + trace_contention_begin(lock, _RET_IP_, LCB_F_RT); > > I guess one issue with this is that _RET_IP_ will return the rt_mutex > address if this is not inlined, making the _RET_IP_ rather useless. > > Now, if we can pass the _RET_IP_ into __rt_mutex_slowlock() then we could > reference that. Right, that's what I did in patch 03 and 04 for mutex and rwsem. But I forgot to update rt_mutex, will fix. Thanks, Namhyung
On Tue, Mar 1, 2022 at 1:04 AM Peter Zijlstra <peterz@infradead.org> wrote: > > On Mon, Feb 28, 2022 at 05:04:10PM -0800, Namhyung Kim wrote: > > @@ -1718,9 +1726,11 @@ static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) > > { > > unsigned long flags; > > > > + trace_contention_begin(lock, _RET_IP_, LCB_F_RT | TASK_RTLOCK_WAIT); > > raw_spin_lock_irqsave(&lock->wait_lock, flags); > > rtlock_slowlock_locked(lock); > > raw_spin_unlock_irqrestore(&lock->wait_lock, flags); > > + trace_contention_end(lock); > > } > > Same, if you do it one level in, you can have the tracepoint itself look > at current->__state. So I tried this by reading the state in the trace like below: + TP_fast_assign( + __entry->lock_addr = lock; + __entry->flags = flags | get_current_state(); + ), But I sometimes see unrelated values which contain __TASK_TRACED or __TASK_STOPPED and some unexpected values like TASK_UNINTERRUPTIBLE for rwlocks. Maybe I missed something. Anyway I think it's confusing and complicates things unnecessarily. Probably it'd better using new flags like LCB_F_SPIN and LCB_F_WAIT. Thanks, Namhyung
diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index 7ae0ece07b4e..9735c39b05bb 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -12,6 +12,7 @@ #include <linux/atomic.h> #include <asm/barrier.h> #include <asm/processor.h> +#include <linux/lock_trace.h> #include <asm-generic/qrwlock_types.h> @@ -80,7 +81,9 @@ static inline void queued_read_lock(struct qrwlock *lock) return; /* The slowpath will decrement the reader count, if necessary. */ + LOCK_CONTENTION_BEGIN(lock, LCB_F_READ); queued_read_lock_slowpath(lock); + LOCK_CONTENTION_END(lock); } /** @@ -94,7 +97,9 @@ static inline void queued_write_lock(struct qrwlock *lock) if (likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED))) return; + LOCK_CONTENTION_BEGIN(lock, LCB_F_WRITE); queued_write_lock_slowpath(lock); + LOCK_CONTENTION_END(lock); } /** diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index d74b13825501..986b96fadbf9 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -12,6 +12,7 @@ #include <asm-generic/qspinlock_types.h> #include <linux/atomic.h> +#include <linux/lock_trace.h> #ifndef queued_spin_is_locked /** @@ -82,7 +83,9 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) return; + LOCK_CONTENTION_BEGIN(lock, 0); queued_spin_lock_slowpath(lock, val); + LOCK_CONTENTION_END(lock); } #endif diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h index 7bca0a537dbd..9b285083f88f 100644 --- a/include/trace/events/lock.h +++ b/include/trace/events/lock.h @@ -6,6 +6,7 @@ #define _TRACE_LOCK_H #include <linux/tracepoint.h> +#include <linux/lock_trace.h> #ifdef CONFIG_LOCKDEP diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 5e3585950ec8..756624c14dfd 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -30,6 +30,8 @@ #include <linux/debug_locks.h> #include <linux/osq_lock.h> +#include <trace/events/lock.h> + #ifndef CONFIG_PREEMPT_RT #include "mutex.h" @@ -626,6 +628,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas waiter.ww_ctx = ww_ctx; lock_contended(&lock->dep_map, ip); + trace_contention_begin(lock, ip, state); if (!use_ww_ctx) { /* add waiting tasks to the end of the waitqueue (FIFO): */ @@ -688,6 +691,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas } raw_spin_lock(&lock->wait_lock); acquired: + trace_contention_end(lock); __set_current_state(TASK_RUNNING); if (ww_ctx) { diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index c9fdae94e098..4049b79b3dcc 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -9,6 +9,7 @@ #include <linux/sched/task.h> #include <linux/sched/debug.h> #include <linux/errno.h> +#include <trace/events/lock.h> int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, const char *name, struct lock_class_key *key) @@ -171,9 +172,12 @@ bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) if (try) return false; + trace_contention_begin(sem, _RET_IP_, + LCB_F_READ | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE); preempt_enable(); percpu_rwsem_wait(sem, /* .reader = */ true); preempt_disable(); + trace_contention_end(sem); return true; } @@ -224,8 +228,13 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem) * Try set sem->block; this provides writer-writer exclusion. * Having sem->block set makes new readers block. */ - if (!__percpu_down_write_trylock(sem)) + if (!__percpu_down_write_trylock(sem)) { + unsigned int flags = LCB_F_WRITE | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE; + + trace_contention_begin(sem, _RET_IP_, flags); percpu_rwsem_wait(sem, /* .reader = */ false); + trace_contention_end(sem); + } /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */ diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 8555c4efe97c..e49f5d2a232b 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -24,6 +24,8 @@ #include <linux/sched/wake_q.h> #include <linux/ww_mutex.h> +#include <trace/events/lock.h> + #include "rtmutex_common.h" #ifndef WW_RT @@ -1652,10 +1654,16 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, unsigned int state) { + int ret; + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) return 0; - return rt_mutex_slowlock(lock, NULL, state); + trace_contention_begin(lock, _RET_IP_, LCB_F_RT | state); + ret = rt_mutex_slowlock(lock, NULL, state); + trace_contention_end(lock); + + return ret; } #endif /* RT_MUTEX_BUILD_MUTEX */ @@ -1718,9 +1726,11 @@ static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) { unsigned long flags; + trace_contention_begin(lock, _RET_IP_, LCB_F_RT | TASK_RTLOCK_WAIT); raw_spin_lock_irqsave(&lock->wait_lock, flags); rtlock_slowlock_locked(lock); raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + trace_contention_end(lock); } #endif /* RT_MUTEX_BUILD_SPINLOCKS */ diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index 6fd3162e4098..8a28f1195c58 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -136,10 +136,16 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb, unsigned int state) { + int ret; + if (rwbase_read_trylock(rwb)) return 0; - return __rwbase_read_lock(rwb, state); + trace_contention_begin(rwb, _RET_IP_, LCB_F_READ | LCB_F_RT | state); + ret = __rwbase_read_lock(rwb, state); + trace_contention_end(rwb); + + return ret; } static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb, @@ -246,12 +252,14 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, if (__rwbase_write_trylock(rwb)) goto out_unlock; + trace_contention_begin(rwb, _RET_IP_, LCB_F_WRITE | LCB_F_RT | state); rwbase_set_and_save_current_state(state); for (;;) { /* Optimized out for rwlocks */ if (rwbase_signal_pending_state(state, current)) { rwbase_restore_current_state(); __rwbase_write_unlock(rwb, 0, flags); + trace_contention_end(rwb); return -EINTR; } @@ -265,6 +273,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, set_current_state(state); } rwbase_restore_current_state(); + trace_contention_end(rwb); out_unlock: raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index acde5d6f1254..a1a17af7f747 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -27,6 +27,7 @@ #include <linux/export.h> #include <linux/rwsem.h> #include <linux/atomic.h> +#include <trace/events/lock.h> #ifndef CONFIG_PREEMPT_RT #include "lock_events.h" @@ -1209,9 +1210,14 @@ static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) static inline int __down_read_common(struct rw_semaphore *sem, int state) { long count; + void *ret; if (!rwsem_read_trylock(sem, &count)) { - if (IS_ERR(rwsem_down_read_slowpath(sem, count, state))) + trace_contention_begin(sem, _RET_IP_, LCB_F_READ | state); + ret = rwsem_down_read_slowpath(sem, count, state); + trace_contention_end(sem); + + if (IS_ERR(ret)) return -EINTR; DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); } @@ -1255,8 +1261,14 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) */ static inline int __down_write_common(struct rw_semaphore *sem, int state) { + void *ret; + if (unlikely(!rwsem_write_trylock(sem))) { - if (IS_ERR(rwsem_down_write_slowpath(sem, state))) + trace_contention_begin(sem, _RET_IP_, LCB_F_WRITE | state); + ret = rwsem_down_write_slowpath(sem, state); + trace_contention_end(sem); + + if (IS_ERR(ret)) return -EINTR; }
Adding the lock contention tracepoints in various lock function slow paths. Note that each arch can define spinlock differently, I only added it only to the generic qspinlock for now. Signed-off-by: Namhyung Kim <namhyung@kernel.org> --- include/asm-generic/qrwlock.h | 5 +++++ include/asm-generic/qspinlock.h | 3 +++ include/trace/events/lock.h | 1 + kernel/locking/mutex.c | 4 ++++ kernel/locking/percpu-rwsem.c | 11 ++++++++++- kernel/locking/rtmutex.c | 12 +++++++++++- kernel/locking/rwbase_rt.c | 11 ++++++++++- kernel/locking/rwsem.c | 16 ++++++++++++++-- 8 files changed, 58 insertions(+), 5 deletions(-)