diff mbox series

[2/4] locking: Apply contention tracepoints in the slow path

Message ID 20220301010412.431299-3-namhyung@kernel.org (mailing list archive)
State Not Applicable
Headers show
Series locking: Add new lock contention tracepoints (v2) | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Namhyung Kim March 1, 2022, 1:04 a.m. UTC
Adding the lock contention tracepoints in various lock function slow
paths.  Note that each arch can define spinlock differently, I only
added it only to the generic qspinlock for now.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 include/asm-generic/qrwlock.h   |  5 +++++
 include/asm-generic/qspinlock.h |  3 +++
 include/trace/events/lock.h     |  1 +
 kernel/locking/mutex.c          |  4 ++++
 kernel/locking/percpu-rwsem.c   | 11 ++++++++++-
 kernel/locking/rtmutex.c        | 12 +++++++++++-
 kernel/locking/rwbase_rt.c      | 11 ++++++++++-
 kernel/locking/rwsem.c          | 16 ++++++++++++++--
 8 files changed, 58 insertions(+), 5 deletions(-)

Comments

Peter Zijlstra March 1, 2022, 8:43 a.m. UTC | #1
On Mon, Feb 28, 2022 at 05:04:10PM -0800, Namhyung Kim wrote:
> @@ -80,7 +81,9 @@ static inline void queued_read_lock(struct qrwlock *lock)
>  		return;
>  
>  	/* The slowpath will decrement the reader count, if necessary. */
> +	LOCK_CONTENTION_BEGIN(lock, LCB_F_READ);
>  	queued_read_lock_slowpath(lock);
> +	LOCK_CONTENTION_END(lock);
>  }
>  
>  /**
> @@ -94,7 +97,9 @@ static inline void queued_write_lock(struct qrwlock *lock)
>  	if (likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED)))
>  		return;
>  
> +	LOCK_CONTENTION_BEGIN(lock, LCB_F_WRITE);
>  	queued_write_lock_slowpath(lock);
> +	LOCK_CONTENTION_END(lock);
>  }

> @@ -82,7 +83,9 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
>  	if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
>  		return;
>  
> +	LOCK_CONTENTION_BEGIN(lock, 0);
>  	queued_spin_lock_slowpath(lock, val);
> +	LOCK_CONTENTION_END(lock);
>  }

Can you please stick that _inside_ the slowpath? You really don't want
to inline that.
Peter Zijlstra March 1, 2022, 9:03 a.m. UTC | #2
On Mon, Feb 28, 2022 at 05:04:10PM -0800, Namhyung Kim wrote:

> @@ -171,9 +172,12 @@ bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
>  	if (try)
>  		return false;
>  
> +	trace_contention_begin(sem, _RET_IP_,
> +			       LCB_F_READ | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE);

That is a bit unwieldy, isn't it ?

>  	preempt_enable();
>  	percpu_rwsem_wait(sem, /* .reader = */ true);
>  	preempt_disable();
> +	trace_contention_end(sem);
>  
>  	return true;
>  }
> @@ -224,8 +228,13 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
>  	 * Try set sem->block; this provides writer-writer exclusion.
>  	 * Having sem->block set makes new readers block.
>  	 */
> -	if (!__percpu_down_write_trylock(sem))
> +	if (!__percpu_down_write_trylock(sem)) {
> +		unsigned int flags = LCB_F_WRITE | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE;
> +
> +		trace_contention_begin(sem, _RET_IP_, flags);
>  		percpu_rwsem_wait(sem, /* .reader = */ false);
> +		trace_contention_end(sem);
> +	}
>  
>  	/* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */
>  

Wouldn't it be easier to stick all that inside percpu_rwsem_wait() and
have it only once? You can even re-frob the wait loop such that the
tracepoint can use current->__state or something.

diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index c9fdae94e098..ca01f8ff88e5 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -154,13 +154,16 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
 	}
 	spin_unlock_irq(&sem->waiters.lock);
 
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	trace_contention_begin(sem, _RET_IP_, LCB_F_PERCPU | LCB_F_WRITE*!reader);
 	while (wait) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (!smp_load_acquire(&wq_entry.private))
 			break;
 		schedule();
+		set_current_state(TASK_UNINTERRUPTIBLE);
 	}
 	__set_current_state(TASK_RUNNING);
+	trace_contention_end(sem);
 }
 
 bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)

> diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
> index 8555c4efe97c..e49f5d2a232b 100644
> --- a/kernel/locking/rtmutex.c
> +++ b/kernel/locking/rtmutex.c
> @@ -24,6 +24,8 @@
>  #include <linux/sched/wake_q.h>
>  #include <linux/ww_mutex.h>
>  
> +#include <trace/events/lock.h>
> +
>  #include "rtmutex_common.h"
>  
>  #ifndef WW_RT
> @@ -1652,10 +1654,16 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
>  static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
>  					   unsigned int state)
>  {
> +	int ret;
> +
>  	if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
>  		return 0;
>  
> -	return rt_mutex_slowlock(lock, NULL, state);
> +	trace_contention_begin(lock, _RET_IP_, LCB_F_RT | state);
> +	ret = rt_mutex_slowlock(lock, NULL, state);
> +	trace_contention_end(lock);
> +
> +	return ret;
>  }
>  #endif /* RT_MUTEX_BUILD_MUTEX */
>  
> @@ -1718,9 +1726,11 @@ static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
>  {
>  	unsigned long flags;
>  
> +	trace_contention_begin(lock, _RET_IP_, LCB_F_RT | TASK_RTLOCK_WAIT);
>  	raw_spin_lock_irqsave(&lock->wait_lock, flags);
>  	rtlock_slowlock_locked(lock);
>  	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
> +	trace_contention_end(lock);
>  }

Same, if you do it one level in, you can have the tracepoint itself look
at current->__state. Also, you seem to have forgotten to trace the
return value. Now you can't tell if the lock was acquired, or was denied
(ww_mutex) or we were interrupted.

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 8555c4efe97c..18b9f4bf6f34 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1579,6 +1579,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
 
 	set_current_state(state);
 
+	trace_contention_begin(lock, _RET_IP_, LCB_F_RT);
+
 	ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
 	if (likely(!ret))
 		ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
@@ -1601,6 +1603,9 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
 	 * unconditionally. We might have to fix that up.
 	 */
 	fixup_rt_mutex_waiters(lock);
+
+	trace_contention_end(lock, ret);
+
 	return ret;
 }
 
@@ -1683,6 +1688,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
 	/* Save current state and set state to TASK_RTLOCK_WAIT */
 	current_save_and_set_rtlock_wait_state();
 
+	trace_contention_begin(lock, _RET_IP_, LCB_F_RT);
+
 	task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
 
 	for (;;) {
@@ -1703,6 +1710,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
 		set_current_state(TASK_RTLOCK_WAIT);
 	}
 
+	trace_contention_end(lock, 0);
+
 	/* Restore the task state */
 	current_restore_rtlock_saved_state();
Steven Rostedt March 1, 2022, 2:45 p.m. UTC | #3
On Tue, 1 Mar 2022 10:03:54 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
> index 8555c4efe97c..18b9f4bf6f34 100644
> --- a/kernel/locking/rtmutex.c
> +++ b/kernel/locking/rtmutex.c
> @@ -1579,6 +1579,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
>  
>  	set_current_state(state);
>  
> +	trace_contention_begin(lock, _RET_IP_, LCB_F_RT);

I guess one issue with this is that _RET_IP_ will return the rt_mutex
address if this is not inlined, making the _RET_IP_ rather useless.

Now, if we can pass the _RET_IP_ into __rt_mutex_slowlock() then we could
reference that.

-- Steve


> +
>  	ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
>  	if (likely(!ret))
>  		ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
> @@ -1601,6 +1603,9 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
>  	 * unconditionally. We might have to fix that up.
>  	 */
>  	fixup_rt_mutex_waiters(lock);
> +
> +	trace_contention_end(lock, ret);
> +
>  	return ret;
>  }
Namhyung Kim March 1, 2022, 6:03 p.m. UTC | #4
Hi Peter,

On Tue, Mar 1, 2022 at 12:44 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Feb 28, 2022 at 05:04:10PM -0800, Namhyung Kim wrote:
> > @@ -80,7 +81,9 @@ static inline void queued_read_lock(struct qrwlock *lock)
> >               return;
> >
> >       /* The slowpath will decrement the reader count, if necessary. */
> > +     LOCK_CONTENTION_BEGIN(lock, LCB_F_READ);
> >       queued_read_lock_slowpath(lock);
> > +     LOCK_CONTENTION_END(lock);
> >  }
> >
> >  /**
> > @@ -94,7 +97,9 @@ static inline void queued_write_lock(struct qrwlock *lock)
> >       if (likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED)))
> >               return;
> >
> > +     LOCK_CONTENTION_BEGIN(lock, LCB_F_WRITE);
> >       queued_write_lock_slowpath(lock);
> > +     LOCK_CONTENTION_END(lock);
> >  }
>
> > @@ -82,7 +83,9 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
> >       if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
> >               return;
> >
> > +     LOCK_CONTENTION_BEGIN(lock, 0);
> >       queued_spin_lock_slowpath(lock, val);
> > +     LOCK_CONTENTION_END(lock);
> >  }
>
> Can you please stick that _inside_ the slowpath? You really don't want
> to inline that.

I can move it into the slow path with caller ip.

Thanks,
Namhyung
Namhyung Kim March 1, 2022, 6:11 p.m. UTC | #5
On Tue, Mar 1, 2022 at 1:04 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Feb 28, 2022 at 05:04:10PM -0800, Namhyung Kim wrote:
>
> > @@ -171,9 +172,12 @@ bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
> >       if (try)
> >               return false;
> >
> > +     trace_contention_begin(sem, _RET_IP_,
> > +                            LCB_F_READ | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE);
>
> That is a bit unwieldy, isn't it ?
>
> >       preempt_enable();
> >       percpu_rwsem_wait(sem, /* .reader = */ true);
> >       preempt_disable();
> > +     trace_contention_end(sem);
> >
> >       return true;
> >  }
> > @@ -224,8 +228,13 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
> >        * Try set sem->block; this provides writer-writer exclusion.
> >        * Having sem->block set makes new readers block.
> >        */
> > -     if (!__percpu_down_write_trylock(sem))
> > +     if (!__percpu_down_write_trylock(sem)) {
> > +             unsigned int flags = LCB_F_WRITE | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE;
> > +
> > +             trace_contention_begin(sem, _RET_IP_, flags);
> >               percpu_rwsem_wait(sem, /* .reader = */ false);
> > +             trace_contention_end(sem);
> > +     }
> >
> >       /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */
> >
>
> Wouldn't it be easier to stick all that inside percpu_rwsem_wait() and
> have it only once? You can even re-frob the wait loop such that the
> tracepoint can use current->__state or something.
>
> diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
> index c9fdae94e098..ca01f8ff88e5 100644
> --- a/kernel/locking/percpu-rwsem.c
> +++ b/kernel/locking/percpu-rwsem.c
> @@ -154,13 +154,16 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
>         }
>         spin_unlock_irq(&sem->waiters.lock);
>
> +       set_current_state(TASK_UNINTERRUPTIBLE);
> +       trace_contention_begin(sem, _RET_IP_, LCB_F_PERCPU | LCB_F_WRITE*!reader);
>         while (wait) {
> -               set_current_state(TASK_UNINTERRUPTIBLE);
>                 if (!smp_load_acquire(&wq_entry.private))
>                         break;
>                 schedule();
> +               set_current_state(TASK_UNINTERRUPTIBLE);
>         }
>         __set_current_state(TASK_RUNNING);
> +       trace_contention_end(sem);
>  }

Looks good.  I'll make similar changes in other places too.

One general concern of moving inside is that it makes the _RET_IP_
useless.  If we can pass the caller ip to the slowpath function,
it'd be fine.

>
> > diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
> > index 8555c4efe97c..e49f5d2a232b 100644
> > --- a/kernel/locking/rtmutex.c
> > +++ b/kernel/locking/rtmutex.c
> > @@ -24,6 +24,8 @@
> >  #include <linux/sched/wake_q.h>
> >  #include <linux/ww_mutex.h>
> >
> > +#include <trace/events/lock.h>
> > +
> >  #include "rtmutex_common.h"
> >
> >  #ifndef WW_RT
> > @@ -1652,10 +1654,16 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
> >  static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
> >                                          unsigned int state)
> >  {
> > +     int ret;
> > +
> >       if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
> >               return 0;
> >
> > -     return rt_mutex_slowlock(lock, NULL, state);
> > +     trace_contention_begin(lock, _RET_IP_, LCB_F_RT | state);
> > +     ret = rt_mutex_slowlock(lock, NULL, state);
> > +     trace_contention_end(lock);
> > +
> > +     return ret;
> >  }
> >  #endif /* RT_MUTEX_BUILD_MUTEX */
> >
> > @@ -1718,9 +1726,11 @@ static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
> >  {
> >       unsigned long flags;
> >
> > +     trace_contention_begin(lock, _RET_IP_, LCB_F_RT | TASK_RTLOCK_WAIT);
> >       raw_spin_lock_irqsave(&lock->wait_lock, flags);
> >       rtlock_slowlock_locked(lock);
> >       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
> > +     trace_contention_end(lock);
> >  }
>
> Same, if you do it one level in, you can have the tracepoint itself look
> at current->__state. Also, you seem to have forgotten to trace the
> return value. Now you can't tell if the lock was acquired, or was denied
> (ww_mutex) or we were interrupted.

Right, thanks for pointing that out.  Will add that.

Thanks,
Namhyung

>
> diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
> index 8555c4efe97c..18b9f4bf6f34 100644
> --- a/kernel/locking/rtmutex.c
> +++ b/kernel/locking/rtmutex.c
> @@ -1579,6 +1579,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
>
>         set_current_state(state);
>
> +       trace_contention_begin(lock, _RET_IP_, LCB_F_RT);
> +
>         ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
>         if (likely(!ret))
>                 ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
> @@ -1601,6 +1603,9 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
>          * unconditionally. We might have to fix that up.
>          */
>         fixup_rt_mutex_waiters(lock);
> +
> +       trace_contention_end(lock, ret);
> +
>         return ret;
>  }
>
> @@ -1683,6 +1688,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
>         /* Save current state and set state to TASK_RTLOCK_WAIT */
>         current_save_and_set_rtlock_wait_state();
>
> +       trace_contention_begin(lock, _RET_IP_, LCB_F_RT);
> +
>         task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
>
>         for (;;) {
> @@ -1703,6 +1710,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
>                 set_current_state(TASK_RTLOCK_WAIT);
>         }
>
> +       trace_contention_end(lock, 0);
> +
>         /* Restore the task state */
>         current_restore_rtlock_saved_state();
>
Namhyung Kim March 1, 2022, 6:14 p.m. UTC | #6
Hi Steve,

On Tue, Mar 1, 2022 at 6:45 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Tue, 1 Mar 2022 10:03:54 +0100
> Peter Zijlstra <peterz@infradead.org> wrote:
>
> > diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
> > index 8555c4efe97c..18b9f4bf6f34 100644
> > --- a/kernel/locking/rtmutex.c
> > +++ b/kernel/locking/rtmutex.c
> > @@ -1579,6 +1579,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
> >
> >       set_current_state(state);
> >
> > +     trace_contention_begin(lock, _RET_IP_, LCB_F_RT);
>
> I guess one issue with this is that _RET_IP_ will return the rt_mutex
> address if this is not inlined, making the _RET_IP_ rather useless.
>
> Now, if we can pass the _RET_IP_ into __rt_mutex_slowlock() then we could
> reference that.

Right, that's what I did in patch 03 and 04 for mutex and rwsem.
But I forgot to update rt_mutex, will fix.

Thanks,
Namhyung
Namhyung Kim March 14, 2022, 9:44 p.m. UTC | #7
On Tue, Mar 1, 2022 at 1:04 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Feb 28, 2022 at 05:04:10PM -0800, Namhyung Kim wrote:
> > @@ -1718,9 +1726,11 @@ static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
> >  {
> >       unsigned long flags;
> >
> > +     trace_contention_begin(lock, _RET_IP_, LCB_F_RT | TASK_RTLOCK_WAIT);
> >       raw_spin_lock_irqsave(&lock->wait_lock, flags);
> >       rtlock_slowlock_locked(lock);
> >       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
> > +     trace_contention_end(lock);
> >  }
>
> Same, if you do it one level in, you can have the tracepoint itself look
> at current->__state.

So I tried this by reading the state in the trace like below:

+       TP_fast_assign(
+               __entry->lock_addr = lock;
+               __entry->flags = flags | get_current_state();
+       ),

But I sometimes see unrelated values which contain
__TASK_TRACED or __TASK_STOPPED and some unexpected values
like TASK_UNINTERRUPTIBLE for rwlocks.  Maybe I missed something.

Anyway I think it's confusing and complicates things unnecessarily.
Probably it'd better using new flags like LCB_F_SPIN and LCB_F_WAIT.

Thanks,
Namhyung
diff mbox series

Patch

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 7ae0ece07b4e..9735c39b05bb 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -12,6 +12,7 @@ 
 #include <linux/atomic.h>
 #include <asm/barrier.h>
 #include <asm/processor.h>
+#include <linux/lock_trace.h>
 
 #include <asm-generic/qrwlock_types.h>
 
@@ -80,7 +81,9 @@  static inline void queued_read_lock(struct qrwlock *lock)
 		return;
 
 	/* The slowpath will decrement the reader count, if necessary. */
+	LOCK_CONTENTION_BEGIN(lock, LCB_F_READ);
 	queued_read_lock_slowpath(lock);
+	LOCK_CONTENTION_END(lock);
 }
 
 /**
@@ -94,7 +97,9 @@  static inline void queued_write_lock(struct qrwlock *lock)
 	if (likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED)))
 		return;
 
+	LOCK_CONTENTION_BEGIN(lock, LCB_F_WRITE);
 	queued_write_lock_slowpath(lock);
+	LOCK_CONTENTION_END(lock);
 }
 
 /**
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index d74b13825501..986b96fadbf9 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -12,6 +12,7 @@ 
 
 #include <asm-generic/qspinlock_types.h>
 #include <linux/atomic.h>
+#include <linux/lock_trace.h>
 
 #ifndef queued_spin_is_locked
 /**
@@ -82,7 +83,9 @@  static __always_inline void queued_spin_lock(struct qspinlock *lock)
 	if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
 		return;
 
+	LOCK_CONTENTION_BEGIN(lock, 0);
 	queued_spin_lock_slowpath(lock, val);
+	LOCK_CONTENTION_END(lock);
 }
 #endif
 
diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
index 7bca0a537dbd..9b285083f88f 100644
--- a/include/trace/events/lock.h
+++ b/include/trace/events/lock.h
@@ -6,6 +6,7 @@ 
 #define _TRACE_LOCK_H
 
 #include <linux/tracepoint.h>
+#include <linux/lock_trace.h>
 
 #ifdef CONFIG_LOCKDEP
 
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 5e3585950ec8..756624c14dfd 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -30,6 +30,8 @@ 
 #include <linux/debug_locks.h>
 #include <linux/osq_lock.h>
 
+#include <trace/events/lock.h>
+
 #ifndef CONFIG_PREEMPT_RT
 #include "mutex.h"
 
@@ -626,6 +628,7 @@  __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
 		waiter.ww_ctx = ww_ctx;
 
 	lock_contended(&lock->dep_map, ip);
+	trace_contention_begin(lock, ip, state);
 
 	if (!use_ww_ctx) {
 		/* add waiting tasks to the end of the waitqueue (FIFO): */
@@ -688,6 +691,7 @@  __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
 	}
 	raw_spin_lock(&lock->wait_lock);
 acquired:
+	trace_contention_end(lock);
 	__set_current_state(TASK_RUNNING);
 
 	if (ww_ctx) {
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index c9fdae94e098..4049b79b3dcc 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -9,6 +9,7 @@ 
 #include <linux/sched/task.h>
 #include <linux/sched/debug.h>
 #include <linux/errno.h>
+#include <trace/events/lock.h>
 
 int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
 			const char *name, struct lock_class_key *key)
@@ -171,9 +172,12 @@  bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
 	if (try)
 		return false;
 
+	trace_contention_begin(sem, _RET_IP_,
+			       LCB_F_READ | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE);
 	preempt_enable();
 	percpu_rwsem_wait(sem, /* .reader = */ true);
 	preempt_disable();
+	trace_contention_end(sem);
 
 	return true;
 }
@@ -224,8 +228,13 @@  void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
 	 * Try set sem->block; this provides writer-writer exclusion.
 	 * Having sem->block set makes new readers block.
 	 */
-	if (!__percpu_down_write_trylock(sem))
+	if (!__percpu_down_write_trylock(sem)) {
+		unsigned int flags = LCB_F_WRITE | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE;
+
+		trace_contention_begin(sem, _RET_IP_, flags);
 		percpu_rwsem_wait(sem, /* .reader = */ false);
+		trace_contention_end(sem);
+	}
 
 	/* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */
 
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 8555c4efe97c..e49f5d2a232b 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -24,6 +24,8 @@ 
 #include <linux/sched/wake_q.h>
 #include <linux/ww_mutex.h>
 
+#include <trace/events/lock.h>
+
 #include "rtmutex_common.h"
 
 #ifndef WW_RT
@@ -1652,10 +1654,16 @@  static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
 static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
 					   unsigned int state)
 {
+	int ret;
+
 	if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
 		return 0;
 
-	return rt_mutex_slowlock(lock, NULL, state);
+	trace_contention_begin(lock, _RET_IP_, LCB_F_RT | state);
+	ret = rt_mutex_slowlock(lock, NULL, state);
+	trace_contention_end(lock);
+
+	return ret;
 }
 #endif /* RT_MUTEX_BUILD_MUTEX */
 
@@ -1718,9 +1726,11 @@  static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
 {
 	unsigned long flags;
 
+	trace_contention_begin(lock, _RET_IP_, LCB_F_RT | TASK_RTLOCK_WAIT);
 	raw_spin_lock_irqsave(&lock->wait_lock, flags);
 	rtlock_slowlock_locked(lock);
 	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+	trace_contention_end(lock);
 }
 
 #endif /* RT_MUTEX_BUILD_SPINLOCKS */
diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
index 6fd3162e4098..8a28f1195c58 100644
--- a/kernel/locking/rwbase_rt.c
+++ b/kernel/locking/rwbase_rt.c
@@ -136,10 +136,16 @@  static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
 static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb,
 					    unsigned int state)
 {
+	int ret;
+
 	if (rwbase_read_trylock(rwb))
 		return 0;
 
-	return __rwbase_read_lock(rwb, state);
+	trace_contention_begin(rwb, _RET_IP_, LCB_F_READ | LCB_F_RT | state);
+	ret = __rwbase_read_lock(rwb, state);
+	trace_contention_end(rwb);
+
+	return ret;
 }
 
 static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb,
@@ -246,12 +252,14 @@  static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
 	if (__rwbase_write_trylock(rwb))
 		goto out_unlock;
 
+	trace_contention_begin(rwb, _RET_IP_, LCB_F_WRITE | LCB_F_RT | state);
 	rwbase_set_and_save_current_state(state);
 	for (;;) {
 		/* Optimized out for rwlocks */
 		if (rwbase_signal_pending_state(state, current)) {
 			rwbase_restore_current_state();
 			__rwbase_write_unlock(rwb, 0, flags);
+			trace_contention_end(rwb);
 			return -EINTR;
 		}
 
@@ -265,6 +273,7 @@  static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
 		set_current_state(state);
 	}
 	rwbase_restore_current_state();
+	trace_contention_end(rwb);
 
 out_unlock:
 	raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index acde5d6f1254..a1a17af7f747 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -27,6 +27,7 @@ 
 #include <linux/export.h>
 #include <linux/rwsem.h>
 #include <linux/atomic.h>
+#include <trace/events/lock.h>
 
 #ifndef CONFIG_PREEMPT_RT
 #include "lock_events.h"
@@ -1209,9 +1210,14 @@  static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 static inline int __down_read_common(struct rw_semaphore *sem, int state)
 {
 	long count;
+	void *ret;
 
 	if (!rwsem_read_trylock(sem, &count)) {
-		if (IS_ERR(rwsem_down_read_slowpath(sem, count, state)))
+		trace_contention_begin(sem, _RET_IP_, LCB_F_READ | state);
+		ret = rwsem_down_read_slowpath(sem, count, state);
+		trace_contention_end(sem);
+
+		if (IS_ERR(ret))
 			return -EINTR;
 		DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
 	}
@@ -1255,8 +1261,14 @@  static inline int __down_read_trylock(struct rw_semaphore *sem)
  */
 static inline int __down_write_common(struct rw_semaphore *sem, int state)
 {
+	void *ret;
+
 	if (unlikely(!rwsem_write_trylock(sem))) {
-		if (IS_ERR(rwsem_down_write_slowpath(sem, state)))
+		trace_contention_begin(sem, _RET_IP_, LCB_F_WRITE | state);
+		ret = rwsem_down_write_slowpath(sem, state);
+		trace_contention_end(sem);
+
+		if (IS_ERR(ret))
 			return -EINTR;
 	}