diff mbox

[v10,03/19] qspinlock: Add pending bit

Message ID 1399474907-22206-4-git-send-email-Waiman.Long@hp.com (mailing list archive)
State New, archived
Headers show

Commit Message

Waiman Long May 7, 2014, 3:01 p.m. UTC
From: Peter Zijlstra <peterz@infradead.org>

Because the qspinlock needs to touch a second cacheline; add a pending
bit and allow a single in-word spinner before we punt to the second
cacheline.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Waiman Long <Waiman.Long@hp.com>
---
 include/asm-generic/qspinlock_types.h |   12 +++-
 kernel/locking/qspinlock.c            |  121 +++++++++++++++++++++++++++------
 2 files changed, 110 insertions(+), 23 deletions(-)

Comments

Peter Zijlstra May 8, 2014, 6:57 p.m. UTC | #1
On Wed, May 07, 2014 at 11:01:31AM -0400, Waiman Long wrote:
> +/**
> + * trylock_pending - try to acquire queue spinlock using the pending bit
> + * @lock : Pointer to queue spinlock structure
> + * @pval : Pointer to value of the queue spinlock 32-bit word
> + * Return: 1 if lock acquired, 0 otherwise
> + */
> +static inline int trylock_pending(struct qspinlock *lock, u32 *pval)

Still don't like you put it in a separate function, but you don't need
the pointer thing. Note how after you fail the trylock_pending() you
touch the second (node) cacheline.

> @@ -110,6 +184,9 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
>  
>  	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
>  
> +	if (trylock_pending(lock, &val))
> +		return;	/* Lock acquired */
> +
>  	node = this_cpu_ptr(&mcs_nodes[0]);
>  	idx = node->count++;
>  	tail = encode_tail(smp_processor_id(), idx);
> @@ -119,15 +196,18 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
>  	node->next = NULL;
>  
>  	/*
> +	 * we already touched the queueing cacheline; don't bother with pending
> +	 * stuff.
> +	 *
>  	 * trylock || xchg(lock, node)
>  	 *
> -	 * 0,0 -> 0,1 ; trylock
> -	 * p,x -> n,x ; prev = xchg(lock, node)
> +	 * 0,0,0 -> 0,0,1 ; trylock
> +	 * p,y,x -> n,y,x ; prev = xchg(lock, node)
>  	 */

And any value of @val we might have had here is completely out-dated.
The only thing that makes sense it to set:

	val = 0;

Which makes us start with a trylock, alternatively we can re-read val.

>  	for (;;) {
>  		new = _Q_LOCKED_VAL;
>  		if (val)
> -			new = tail | (val & _Q_LOCKED_MASK);
> +			new = tail | (val & _Q_LOCKED_PENDING_MASK);
>  
>  		old = atomic_cmpxchg(&lock->val, val, new);
>  		if (old == val)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Waiman Long May 10, 2014, 12:49 a.m. UTC | #2
On 05/08/2014 02:57 PM, Peter Zijlstra wrote:
> On Wed, May 07, 2014 at 11:01:31AM -0400, Waiman Long wrote:
>> +/**
>> + * trylock_pending - try to acquire queue spinlock using the pending bit
>> + * @lock : Pointer to queue spinlock structure
>> + * @pval : Pointer to value of the queue spinlock 32-bit word
>> + * Return: 1 if lock acquired, 0 otherwise
>> + */
>> +static inline int trylock_pending(struct qspinlock *lock, u32 *pval)
> Still don't like you put it in a separate function, but you don't need
> the pointer thing. Note how after you fail the trylock_pending() you
> touch the second (node) cacheline.
>
>> @@ -110,6 +184,9 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
>>
>>   	BUILD_BUG_ON(CONFIG_NR_CPUS>= (1U<<  _Q_TAIL_CPU_BITS));
>>
>> +	if (trylock_pending(lock,&val))
>> +		return;	/* Lock acquired */
>> +
>>   	node = this_cpu_ptr(&mcs_nodes[0]);
>>   	idx = node->count++;
>>   	tail = encode_tail(smp_processor_id(), idx);
>> @@ -119,15 +196,18 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
>>   	node->next = NULL;
>>
>>   	/*
>> +	 * we already touched the queueing cacheline; don't bother with pending
>> +	 * stuff.
>> +	 *
>>   	 * trylock || xchg(lock, node)
>>   	 *
>> -	 * 0,0 ->  0,1 ; trylock
>> -	 * p,x ->  n,x ; prev = xchg(lock, node)
>> +	 * 0,0,0 ->  0,0,1 ; trylock
>> +	 * p,y,x ->  n,y,x ; prev = xchg(lock, node)
>>   	 */
> And any value of @val we might have had here is completely out-dated.
> The only thing that makes sense it to set:
>
> 	val = 0;
>
> Which makes us start with a trylock, alternatively we can re-read val.

That is true. I will make the change to get rid of the pointer thing.

As for the separate trylock_pending function, my original goal was to 
have a better delineation of different portions of the code.  Given the 
fact that I broke up the slowpath function into 2 in a later patch, I 
may not really need to separate it out. I will pull it back in the next 
version.

-Longman
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h
index f66f845..bd25081 100644
--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h
@@ -39,8 +39,9 @@  typedef struct qspinlock {
  * Bitfields in the atomic value:
  *
  *  0- 7: locked byte
- *  8- 9: tail index
- * 10-31: tail cpu (+1)
+ *     8: pending
+ *  9-10: tail index
+ * 11-31: tail cpu (+1)
  */
 #define	_Q_SET_MASK(type)	(((1U << _Q_ ## type ## _BITS) - 1)\
 				      << _Q_ ## type ## _OFFSET)
@@ -48,7 +49,11 @@  typedef struct qspinlock {
 #define _Q_LOCKED_BITS		8
 #define _Q_LOCKED_MASK		_Q_SET_MASK(LOCKED)
 
-#define _Q_TAIL_IDX_OFFSET	(_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
+#define _Q_PENDING_OFFSET	(_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
+#define _Q_PENDING_BITS		1
+#define _Q_PENDING_MASK		_Q_SET_MASK(PENDING)
+
+#define _Q_TAIL_IDX_OFFSET	(_Q_PENDING_OFFSET + _Q_PENDING_BITS)
 #define _Q_TAIL_IDX_BITS	2
 #define _Q_TAIL_IDX_MASK	_Q_SET_MASK(TAIL_IDX)
 
@@ -57,5 +62,6 @@  typedef struct qspinlock {
 #define _Q_TAIL_CPU_MASK	_Q_SET_MASK(TAIL_CPU)
 
 #define _Q_LOCKED_VAL		(1U << _Q_LOCKED_OFFSET)
+#define _Q_PENDING_VAL		(1U << _Q_PENDING_OFFSET)
 
 #endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index b97a1ad..6467bfc 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -83,23 +83,97 @@  static inline struct mcs_spinlock *decode_tail(u32 tail)
 	return per_cpu_ptr(&mcs_nodes[idx], cpu);
 }
 
+#define _Q_LOCKED_PENDING_MASK	(_Q_LOCKED_MASK | _Q_PENDING_MASK)
+
+/**
+ * trylock_pending - try to acquire queue spinlock using the pending bit
+ * @lock : Pointer to queue spinlock structure
+ * @pval : Pointer to value of the queue spinlock 32-bit word
+ * Return: 1 if lock acquired, 0 otherwise
+ */
+static inline int trylock_pending(struct qspinlock *lock, u32 *pval)
+{
+	u32 old, new, val = *pval;
+
+	/*
+	 * trylock || pending
+	 *
+	 * 0,0,0 -> 0,0,1 ; trylock
+	 * 0,0,1 -> 0,1,1 ; pending
+	 */
+	for (;;) {
+		/*
+		 * If we observe any contention; queue.
+		 */
+		if (val & ~_Q_LOCKED_MASK)
+			return 0;
+
+		new = _Q_LOCKED_VAL;
+		if (val == new)
+			new |= _Q_PENDING_VAL;
+
+		old = atomic_cmpxchg(&lock->val, val, new);
+		if (old == val)
+			break;
+
+		*pval = val = old;
+	}
+
+	/*
+	 * we won the trylock
+	 */
+	if (new == _Q_LOCKED_VAL)
+		return 1;
+
+	/*
+	 * we're pending, wait for the owner to go away.
+	 *
+	 * *,1,1 -> *,1,0
+	 */
+	while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK)
+		arch_mutex_cpu_relax();
+
+	/*
+	 * take ownership and clear the pending bit.
+	 *
+	 * *,1,0 -> *,0,1
+	 */
+	for (;;) {
+		new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+
+		old = atomic_cmpxchg(&lock->val, val, new);
+		if (old == val)
+			break;
+
+		val = old;
+	}
+	return 1;
+}
+
 /**
  * queue_spin_lock_slowpath - acquire the queue spinlock
  * @lock: Pointer to queue spinlock structure
  * @val: Current value of the queue spinlock 32-bit word
  *
- * (queue tail, lock bit)
+ * (queue tail, pending bit, lock bit)
+ *
+ *              fast     :    slow                                  :    unlock
+ *                       :                                          :
+ * uncontended  (0,0,0) -:--> (0,0,1) ------------------------------:--> (*,*,0)
+ *                       :       | ^--------.------.             /  :
+ *                       :       v           \      \            |  :
+ * pending               :    (0,1,1) +--> (0,1,0)   \           |  :
+ *                       :       | ^--'              |           |  :
+ *                       :       v                   |           |  :
+ * uncontended           :    (n,x,y) +--> (n,0,0) --'           |  :
+ *   queue               :       | ^--'                          |  :
+ *                       :       v                               |  :
+ * contended             :    (*,x,y) +--> (*,0,0) ---> (*,0,1) -'  :
+ *   queue               :         ^--'                             :
  *
- *              fast      :    slow                                  :    unlock
- *                        :                                          :
- * uncontended  (0,0)   --:--> (0,1) --------------------------------:--> (*,0)
- *                        :       | ^--------.                    /  :
- *                        :       v           \                   |  :
- * uncontended            :    (n,x) --+--> (n,0)                 |  :
- *   queue                :       | ^--'                          |  :
- *                        :       v                               |  :
- * contended              :    (*,x) --+--> (*,0) -----> (*,1) ---'  :
- *   queue                :         ^--'                             :
+ * The pending bit processing is in the trylock_pending() function
+ * whereas the uncontended and contended queue processing is in the
+ * queue_spin_lock_slowpath() function.
  *
  */
 void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
@@ -110,6 +184,9 @@  void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 
 	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
 
+	if (trylock_pending(lock, &val))
+		return;	/* Lock acquired */
+
 	node = this_cpu_ptr(&mcs_nodes[0]);
 	idx = node->count++;
 	tail = encode_tail(smp_processor_id(), idx);
@@ -119,15 +196,18 @@  void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	node->next = NULL;
 
 	/*
+	 * we already touched the queueing cacheline; don't bother with pending
+	 * stuff.
+	 *
 	 * trylock || xchg(lock, node)
 	 *
-	 * 0,0 -> 0,1 ; trylock
-	 * p,x -> n,x ; prev = xchg(lock, node)
+	 * 0,0,0 -> 0,0,1 ; trylock
+	 * p,y,x -> n,y,x ; prev = xchg(lock, node)
 	 */
 	for (;;) {
 		new = _Q_LOCKED_VAL;
 		if (val)
-			new = tail | (val & _Q_LOCKED_MASK);
+			new = tail | (val & _Q_LOCKED_PENDING_MASK);
 
 		old = atomic_cmpxchg(&lock->val, val, new);
 		if (old == val)
@@ -145,7 +225,7 @@  void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	/*
 	 * if there was a previous node; link it and wait.
 	 */
-	if (old & ~_Q_LOCKED_MASK) {
+	if (old & ~_Q_LOCKED_PENDING_MASK) {
 		prev = decode_tail(old);
 		ACCESS_ONCE(prev->next) = node;
 
@@ -153,18 +233,19 @@  void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	}
 
 	/*
-	 * we're at the head of the waitqueue, wait for the owner to go away.
+	 * we're at the head of the waitqueue, wait for the owner & pending to
+	 * go away.
 	 *
-	 * *,x -> *,0
+	 * *,x,y -> *,0,0
 	 */
-	while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK)
+	while ((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK)
 		arch_mutex_cpu_relax();
 
 	/*
 	 * claim the lock:
 	 *
-	 * n,0 -> 0,1 : lock, uncontended
-	 * *,0 -> *,1 : lock, contended
+	 * n,0,0 -> 0,0,1 : lock, uncontended
+	 * *,0,0 -> *,0,1 : lock, contended
 	 */
 	for (;;) {
 		new = _Q_LOCKED_VAL;