diff mbox

[v10,09/19] qspinlock: Prepare for unfair lock support

Message ID 1399474907-22206-10-git-send-email-Waiman.Long@hp.com (mailing list archive)
State New, archived
Headers show

Commit Message

Waiman Long May 7, 2014, 3:01 p.m. UTC
If unfair lock is supported, the lock acquisition loop at the end of
the queue_spin_lock_slowpath() function may need to detect the fact
the lock can be stolen. Code are added for the stolen lock detection.

A new qhead macro is also defined as a shorthand for mcs.locked.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
---
 kernel/locking/qspinlock.c |   26 +++++++++++++++++++-------
 1 files changed, 19 insertions(+), 7 deletions(-)

Comments

Peter Zijlstra May 8, 2014, 7:06 p.m. UTC | #1
On Wed, May 07, 2014 at 11:01:37AM -0400, Waiman Long wrote:
> If unfair lock is supported, the lock acquisition loop at the end of
> the queue_spin_lock_slowpath() function may need to detect the fact
> the lock can be stolen. Code are added for the stolen lock detection.
> 
> A new qhead macro is also defined as a shorthand for mcs.locked.

NAK, unfair should be a pure test-and-set lock.

>  /**
>   * get_qlock - Set the lock bit and own the lock
> - * @lock: Pointer to queue spinlock structure
> + * @lock : Pointer to queue spinlock structure
> + * Return: 1 if lock acquired, 0 otherwise
>   *
>   * This routine should only be called when the caller is the only one
>   * entitled to acquire the lock.
>   */
> -static __always_inline void get_qlock(struct qspinlock *lock)
> +static __always_inline int get_qlock(struct qspinlock *lock)
>  {
>  	struct __qspinlock *l = (void *)lock;
>  
>  	barrier();
>  	ACCESS_ONCE(l->locked) = _Q_LOCKED_VAL;
>  	barrier();
> +	return 1;
>  }

and here you make a horribly named function more horrible;
try_set_locked() is that its now.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Waiman Long May 10, 2014, 1:19 a.m. UTC | #2
On 05/08/2014 03:06 PM, Peter Zijlstra wrote:
> On Wed, May 07, 2014 at 11:01:37AM -0400, Waiman Long wrote:
>> If unfair lock is supported, the lock acquisition loop at the end of
>> the queue_spin_lock_slowpath() function may need to detect the fact
>> the lock can be stolen. Code are added for the stolen lock detection.
>>
>> A new qhead macro is also defined as a shorthand for mcs.locked.
> NAK, unfair should be a pure test-and-set lock.

I have performance data showing that a simple test-and-set lock does not 
scale well. That is the primary reason of ditching the test-and-set lock 
and use a more complicated scheme which scales better. Also, it will be 
hard to make the unfair test-and-set lock code to coexist nicely with PV 
spinlock code.

>>   /**
>>    * get_qlock - Set the lock bit and own the lock
>> - * @lock: Pointer to queue spinlock structure
>> + * @lock : Pointer to queue spinlock structure
>> + * Return: 1 if lock acquired, 0 otherwise
>>    *
>>    * This routine should only be called when the caller is the only one
>>    * entitled to acquire the lock.
>>    */
>> -static __always_inline void get_qlock(struct qspinlock *lock)
>> +static __always_inline int get_qlock(struct qspinlock *lock)
>>   {
>>   	struct __qspinlock *l = (void *)lock;
>>
>>   	barrier();
>>   	ACCESS_ONCE(l->locked) = _Q_LOCKED_VAL;
>>   	barrier();
>> +	return 1;
>>   }
> and here you make a horribly named function more horrible;
> try_set_locked() is that its now.

Will do.

-Longman
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Zijlstra May 10, 2014, 2:13 p.m. UTC | #3
On Fri, May 09, 2014 at 09:19:32PM -0400, Waiman Long wrote:
> On 05/08/2014 03:06 PM, Peter Zijlstra wrote:
> >On Wed, May 07, 2014 at 11:01:37AM -0400, Waiman Long wrote:
> >>If unfair lock is supported, the lock acquisition loop at the end of
> >>the queue_spin_lock_slowpath() function may need to detect the fact
> >>the lock can be stolen. Code are added for the stolen lock detection.
> >>
> >>A new qhead macro is also defined as a shorthand for mcs.locked.
> >NAK, unfair should be a pure test-and-set lock.
> 
> I have performance data showing that a simple test-and-set lock does not
> scale well. That is the primary reason of ditching the test-and-set lock and
> use a more complicated scheme which scales better.

Nobody should give a fuck about scalability in this case anyway.

Also, as I explained/asked earlier:

  lkml.kernel.org/r/20140314083001.GN27965@twins.programming.kicks-ass.net

Lock holder preemption is _way_ worse with any kind of queueing. You've
not explained how the simple 3 cpu example in that email gets better
performance than a test-and-set lock.

> Also, it will be hard to
> make the unfair test-and-set lock code to coexist nicely with PV spinlock
> code.

That's just complete crap as the test-and-set lock is like 3 lines of
code.
diff mbox

Patch

diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index e98d7d4..9e7659e 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -64,6 +64,7 @@ 
 struct qnode {
 	struct mcs_spinlock mcs;
 };
+#define qhead	mcs.locked	/* The queue head flag */
 
 /*
  * Per-CPU queue node structures; we can never have more than 4 nested
@@ -216,18 +217,20 @@  xchg_tail(struct qspinlock *lock, u32 tail, u32 *pval)
 
 /**
  * get_qlock - Set the lock bit and own the lock
- * @lock: Pointer to queue spinlock structure
+ * @lock : Pointer to queue spinlock structure
+ * Return: 1 if lock acquired, 0 otherwise
  *
  * This routine should only be called when the caller is the only one
  * entitled to acquire the lock.
  */
-static __always_inline void get_qlock(struct qspinlock *lock)
+static __always_inline int get_qlock(struct qspinlock *lock)
 {
 	struct __qspinlock *l = (void *)lock;
 
 	barrier();
 	ACCESS_ONCE(l->locked) = _Q_LOCKED_VAL;
 	barrier();
+	return 1;
 }
 
 /**
@@ -365,7 +368,7 @@  void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	tail = encode_tail(smp_processor_id(), idx);
 
 	node += idx;
-	node->mcs.locked = 0;
+	node->qhead = 0;
 	node->mcs.next = NULL;
 
 	/*
@@ -391,7 +394,7 @@  void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 		prev = decode_tail(old);
 		ACCESS_ONCE(prev->mcs.next) = (struct mcs_spinlock *)node;
 
-		while (!smp_load_acquire(&node->mcs.locked))
+		while (!smp_load_acquire(&node->qhead))
 			arch_mutex_cpu_relax();
 	}
 
@@ -403,6 +406,7 @@  void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	 *
 	 * *,x,y -> *,0,0
 	 */
+retry_queue_wait:
 	while ((val = smp_load_acquire(&lock->val.counter))
 				       & _Q_LOCKED_PENDING_MASK)
 		arch_mutex_cpu_relax();
@@ -419,12 +423,20 @@  void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	 */
 	for (;;) {
 		if (val != tail) {
-			get_qlock(lock);
-			break;
+			/*
+			 * The get_qlock function will only failed if the
+			 * lock was stolen.
+			 */
+			if (get_qlock(lock))
+				break;
+			else
+				goto retry_queue_wait;
 		}
 		old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
 		if (old == val)
 			goto release;	/* No contention */
+		else if (old & _Q_LOCKED_MASK)
+			goto retry_queue_wait;
 
 		val = old;
 	}
@@ -435,7 +447,7 @@  void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	while (!(next = (struct qnode *)ACCESS_ONCE(node->mcs.next)))
 		arch_mutex_cpu_relax();
 
-	arch_mcs_spin_unlock_contended(&next->mcs.locked);
+	arch_mcs_spin_unlock_contended(&next->qhead);
 
 release:
 	/*