@@ -103,24 +103,33 @@ static inline struct mcs_spinlock *decode_tail(u32 tail)
* By using the whole 2nd least significant byte for the pending bit, we
* can allow better optimization of the lock acquisition for the pending
* bit holder.
+ *
+ * This internal structure is also used by the set_locked function which
+ * is not restricted to _Q_PENDING_BITS == 8.
*/
-#if _Q_PENDING_BITS == 8
-
struct __qspinlock {
union {
atomic_t val;
- struct {
#ifdef __LITTLE_ENDIAN
+ u8 locked;
+ struct {
u16 locked_pending;
u16 tail;
+ };
#else
+ struct {
u16 tail;
u16 locked_pending;
-#endif
};
+ struct {
+ u8 reserved[3];
+ u8 locked;
+ };
+#endif
};
};
+#if _Q_PENDING_BITS == 8
/**
* clear_pending_set_locked - take ownership and clear the pending bit.
* @lock: Pointer to queue spinlock structure
@@ -207,6 +216,19 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
#endif /* _Q_PENDING_BITS == 8 */
/**
+ * set_locked - Set the lock bit and own the lock
+ * @lock: Pointer to queue spinlock structure
+ *
+ * *,*,0 -> *,0,1
+ */
+static __always_inline void set_locked(struct qspinlock *lock)
+{
+ struct __qspinlock *l = (void *)lock;
+
+ ACCESS_ONCE(l->locked) = _Q_LOCKED_VAL;
+}
+
+/**
* queue_spin_lock_slowpath - acquire the queue spinlock
* @lock: Pointer to queue spinlock structure
* @val: Current value of the queue spinlock 32-bit word
@@ -339,10 +361,13 @@ queue:
/*
* we're at the head of the waitqueue, wait for the owner & pending to
* go away.
+ * Load-acquired is used here because the set_locked()
+ * function below may not be a full memory barrier.
*
* *,x,y -> *,0,0
*/
- while ((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK)
+ while ((val = smp_load_acquire(&lock->val.counter)) &
+ _Q_LOCKED_PENDING_MASK)
cpu_relax();
/*
@@ -350,15 +375,19 @@ queue:
*
* n,0,0 -> 0,0,1 : lock, uncontended
* *,0,0 -> *,0,1 : lock, contended
+ *
+ * If the queue head is the only one in the queue (lock value == tail),
+ * clear the tail code and grab the lock. Otherwise, we only need
+ * to grab the lock.
*/
for (;;) {
- new = _Q_LOCKED_VAL;
- if (val != tail)
- new |= val;
-
- old = atomic_cmpxchg(&lock->val, val, new);
- if (old == val)
+ if (val != tail) {
+ set_locked(lock);
break;
+ }
+ old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
+ if (old == val)
+ goto release; /* No contention */
val = old;
}
@@ -366,12 +395,10 @@ queue:
/*
* contended path; wait for next, release.
*/
- if (new != _Q_LOCKED_VAL) {
- while (!(next = ACCESS_ONCE(node->next)))
- cpu_relax();
+ while (!(next = ACCESS_ONCE(node->next)))
+ cpu_relax();
- arch_mcs_spin_unlock_contended(&next->locked);
- }
+ arch_mcs_spin_unlock_contended(&next->locked);
release:
/*