new file mode 100644
@@ -0,0 +1,122 @@
+/*
+ * Queue spinlock
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors: Waiman Long <waiman.long@hp.com>
+ */
+#ifndef __ASM_GENERIC_QSPINLOCK_H
+#define __ASM_GENERIC_QSPINLOCK_H
+
+#include <asm-generic/qspinlock_types.h>
+
+/*
+ * External function declarations
+ */
+extern void queue_spin_lock_slowpath(struct qspinlock *lock, int qsval);
+
+/**
+ * queue_spin_is_locked - is the spinlock locked?
+ * @lock: Pointer to queue spinlock structure
+ * Return: 1 if it is locked, 0 otherwise
+ */
+static __always_inline int queue_spin_is_locked(struct qspinlock *lock)
+{
+ return atomic_read(&lock->qlcode) & _QLOCK_LOCK_MASK;
+}
+
+/**
+ * queue_spin_value_unlocked - is the spinlock structure unlocked?
+ * @lock: queue spinlock structure
+ * Return: 1 if it is unlocked, 0 otherwise
+ */
+static __always_inline int queue_spin_value_unlocked(struct qspinlock lock)
+{
+ return !(atomic_read(&lock.qlcode) & _QLOCK_LOCK_MASK);
+}
+
+/**
+ * queue_spin_is_contended - check if the lock is contended
+ * @lock : Pointer to queue spinlock structure
+ * Return: 1 if lock contended, 0 otherwise
+ */
+static __always_inline int queue_spin_is_contended(struct qspinlock *lock)
+{
+ return atomic_read(&lock->qlcode) & ~_QLOCK_LOCK_MASK;
+}
+/**
+ * queue_spin_trylock - try to acquire the queue spinlock
+ * @lock : Pointer to queue spinlock structure
+ * Return: 1 if lock acquired, 0 if failed
+ */
+static __always_inline int queue_spin_trylock(struct qspinlock *lock)
+{
+ if (!atomic_read(&lock->qlcode) &&
+ (atomic_cmpxchg(&lock->qlcode, 0, _QLOCK_LOCKED) == 0))
+ return 1;
+ return 0;
+}
+
+/**
+ * queue_spin_lock - acquire a queue spinlock
+ * @lock: Pointer to queue spinlock structure
+ */
+static __always_inline void queue_spin_lock(struct qspinlock *lock)
+{
+ int qsval;
+
+ /*
+ * To reduce memory access to only once for the cold cache case,
+ * a direct cmpxchg() is performed in the fastpath to optimize the
+ * uncontended case. The contended performance, however, may suffer
+ * a bit because of that.
+ */
+ qsval = atomic_cmpxchg(&lock->qlcode, 0, _QLOCK_LOCKED);
+ if (likely(qsval == 0))
+ return;
+ queue_spin_lock_slowpath(lock, qsval);
+}
+
+#ifndef queue_spin_unlock
+/**
+ * queue_spin_unlock - release a queue spinlock
+ * @lock : Pointer to queue spinlock structure
+ */
+static __always_inline void queue_spin_unlock(struct qspinlock *lock)
+{
+ /*
+ * Use an atomic subtraction to clear the lock bit.
+ */
+ smp_mb__before_atomic_dec();
+ atomic_sub(_QLOCK_LOCKED, &lock->qlcode);
+}
+#endif
+
+/*
+ * Initializier
+ */
+#define __ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) }
+
+/*
+ * Remapping spinlock architecture specific functions to the corresponding
+ * queue spinlock functions.
+ */
+#define arch_spin_is_locked(l) queue_spin_is_locked(l)
+#define arch_spin_is_contended(l) queue_spin_is_contended(l)
+#define arch_spin_value_unlocked(l) queue_spin_value_unlocked(l)
+#define arch_spin_lock(l) queue_spin_lock(l)
+#define arch_spin_trylock(l) queue_spin_trylock(l)
+#define arch_spin_unlock(l) queue_spin_unlock(l)
+#define arch_spin_lock_flags(l, f) queue_spin_lock(l)
+
+#endif /* __ASM_GENERIC_QSPINLOCK_H */
new file mode 100644
@@ -0,0 +1,49 @@
+/*
+ * Queue spinlock
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors: Waiman Long <waiman.long@hp.com>
+ */
+#ifndef __ASM_GENERIC_QSPINLOCK_TYPES_H
+#define __ASM_GENERIC_QSPINLOCK_TYPES_H
+
+/*
+ * Including atomic.h with PARAVIRT on will cause compilation errors because
+ * of recursive header file incluson via paravirt_types.h. A workaround is
+ * to include paravirt_types.h here in this case.
+ */
+#ifdef CONFIG_PARAVIRT
+# include <asm/paravirt_types.h>
+#else
+# include <linux/types.h>
+# include <linux/atomic.h>
+#endif
+
+/*
+ * The queue spinlock data structure - a 32-bit word
+ *
+ * The bits assignment are:
+ * Bit 0 : Set if locked
+ * Bits 1-7 : Not used
+ * Bits 8-31: Queue code
+ */
+typedef struct qspinlock {
+ atomic_t qlcode; /* Lock + queue code */
+} arch_spinlock_t;
+
+#define _QCODE_OFFSET 8
+#define _QLOCK_LOCKED 1U
+#define _QLOCK_LOCK_MASK 0xff
+
+#endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */
@@ -223,3 +223,10 @@ endif
config MUTEX_SPIN_ON_OWNER
def_bool y
depends on SMP && !DEBUG_MUTEXES
+
+config ARCH_USE_QUEUE_SPINLOCK
+ bool
+
+config QUEUE_SPINLOCK
+ def_bool y if ARCH_USE_QUEUE_SPINLOCK
+ depends on SMP && !PARAVIRT_SPINLOCKS
@@ -15,6 +15,7 @@ obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
endif
obj-$(CONFIG_SMP) += spinlock.o
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
+obj-$(CONFIG_QUEUE_SPINLOCK) += qspinlock.o
obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
new file mode 100644
@@ -0,0 +1,371 @@
+/*
+ * Queue spinlock
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors: Waiman Long <waiman.long@hp.com>
+ */
+#include <linux/smp.h>
+#include <linux/bug.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+
+/*
+ * The basic principle of a queue-based spinlock can best be understood
+ * by studying a classic queue-based spinlock implementation called the
+ * MCS lock. The paper below provides a good description for this kind
+ * of lock.
+ *
+ * http://www.cise.ufl.edu/tr/DOC/REP-1992-71.pdf
+ *
+ * This queue spinlock implementation is based on the MCS lock with twists
+ * to make it fit the following constraints:
+ * 1. A max spinlock size of 4 bytes
+ * 2. Good fastpath performance
+ * 3. No change in the locking APIs
+ *
+ * The queue spinlock fastpath is as simple as it can get, all the heavy
+ * lifting is done in the lock slowpath. The main idea behind this queue
+ * spinlock implementation is to keep the spinlock size at 4 bytes while
+ * at the same time implement a queue structure to queue up the waiting
+ * lock spinners.
+ *
+ * Since preemption is disabled before getting the lock, a given CPU will
+ * only need to use one queue node structure in a non-interrupt context.
+ * A percpu queue node structure will be allocated for this purpose and the
+ * cpu number will be put into the queue spinlock structure to indicate the
+ * tail of the queue.
+ *
+ * To handle spinlock acquisition at interrupt context (softirq or hardirq),
+ * the queue node structure is actually an array for supporting nested spin
+ * locking operations in interrupt handlers. If all the entries in the
+ * array are used up, a warning message will be printed (as that shouldn't
+ * happen in normal circumstances) and the lock spinner will fall back to
+ * busy spinning instead of waiting in a queue.
+ */
+
+/*
+ * The 24-bit queue node code is divided into the following 2 fields:
+ * Bits 0-1 : queue node index (4 nodes)
+ * Bits 2-23: CPU number + 1 (4M - 1 CPUs)
+ *
+ * A queue node code of 0 indicates that no one is waiting for the lock.
+ * As the value 0 cannot be used as a valid CPU number. We need to add
+ * 1 to it before putting it into the queue code.
+ */
+#define MAX_QNODES 4
+#ifndef _QCODE_VAL_OFFSET
+#define _QCODE_VAL_OFFSET _QCODE_OFFSET
+#endif
+
+/*
+ * Function exit status
+ */
+enum exitval {
+ NORMAL_EXIT = 0,
+ NOTIFY_NEXT , /* Notify the next waiting node CPU */
+ RELEASE_NODE /* Release current node directly */
+};
+
+/*
+ * The queue node structure
+ *
+ * This structure is essentially the same as the mcs_spinlock structure
+ * in mcs_spinlock.h file. It is retained for future extension where new
+ * fields may be added.
+ */
+struct qnode {
+ u32 qhead; /* Queue head flag */
+ struct qnode *next; /* Next queue node addr */
+};
+
+struct qnode_set {
+ struct qnode nodes[MAX_QNODES];
+ int node_idx; /* Current node to use */
+};
+
+/*
+ * Per-CPU queue node structures
+ */
+static DEFINE_PER_CPU_ALIGNED(struct qnode_set, qnset) = { { { 0 } }, 0 };
+
+/*
+ ************************************************************************
+ * Inline functions used by the queue_spin_lock_slowpath() function *
+ * that may get superseded by a more optimized version. *
+ ************************************************************************
+ */
+
+#ifndef __queue_spin_trylock
+/**
+ * __queue_spin_trylock - try to acquire the lock by setting the lock bit
+ * @lock: Pointer to queue spinlock structure
+ * Return: 1 if lock bit set successfully, 0 if failed
+ *
+ * This is an unfair version of the trylock which should only be called
+ * by a caller who is entitled to acquire the lock.
+ */
+static __always_inline int __queue_spin_trylock(struct qspinlock *lock)
+{
+ int qlcode = atomic_read(&lock->qlcode);
+
+ if (!(qlcode & _QLOCK_LOCKED) && (atomic_cmpxchg(&lock->qlcode,
+ qlcode, qlcode|_QLOCK_LOCKED) == qlcode))
+ return 1;
+ return 0;
+}
+#endif /* __queue_spin_trylock */
+
+#ifndef qsval_to_qcode
+/**
+ * qsval_to_qcode - Convert a queue spinlock value to a queue code
+ * @qsval : Queue spinlock value
+ * Return : The corresponding queue code value
+ */
+static inline u32
+qsval_to_qcode(int qsval)
+{
+ return (u32)(qsval & ~_QLOCK_LOCK_MASK);
+}
+#endif /* qsval_to_qcode */
+
+#ifndef queue_spin_trylock_and_clr_qcode
+/**
+ * queue_spin_trylock_and_clr_qcode - Try to lock & clear qcode simultaneously
+ * @lock : Pointer to queue spinlock structure
+ * @qcode: The supposedly current qcode value
+ * Return: true if successful, false otherwise
+ */
+static inline int
+queue_spin_trylock_and_clr_qcode(struct qspinlock *lock, u32 qcode)
+{
+ return atomic_cmpxchg(&lock->qlcode, qcode, _QLOCK_LOCKED) == qcode;
+}
+#endif /* queue_spin_trylock_and_clr_qcode */
+
+#ifndef queue_encode_qcode
+/**
+ * queue_encode_qcode - Encode the CPU number & node index into a qnode code
+ * @cpu_nr: CPU number
+ * @qn_idx: Queue node index
+ * Return : A qnode code that can be saved into the qspinlock structure
+ */
+static inline u32 queue_encode_qcode(u32 cpu_nr, u8 qn_idx)
+{
+ return ((cpu_nr + 1) << (_QCODE_VAL_OFFSET + 2)) |
+ (qn_idx << _QCODE_VAL_OFFSET);
+}
+#endif /* queue_encode_qcode */
+
+#ifndef queue_code_xchg
+/**
+ * queue_code_xchg - exchange a queue code value
+ * @lock : Pointer to queue spinlock structure
+ * @ocode: Old queue code in the lock [OUT]
+ * @ncode: New queue code to be exchanged
+ * Return: An enum exitval value
+ */
+static inline enum exitval
+queue_code_xchg(struct qspinlock *lock, u32 *ocode, u32 ncode)
+{
+ ncode |= _QLOCK_LOCKED; /* Set lock bit */
+
+ /*
+ * Exchange current copy of the queue node code
+ */
+ *ocode = atomic_xchg(&lock->qlcode, ncode);
+
+ if (likely(*ocode & _QLOCK_LOCKED)) {
+ *ocode &= ~_QLOCK_LOCKED; /* Clear the lock bit */
+ return NORMAL_EXIT;
+ }
+ /*
+ * It is possible that we may accidentally steal the lock during
+ * the unlock-lock transition. If this is the case, we need to either
+ * release it if not the head of the queue or get the lock and be
+ * done with it.
+ */
+ if (*ocode == 0) {
+ u32 qcode;
+
+ /*
+ * Got the lock since it is at the head of the queue
+ * Now try to atomically clear the queue code.
+ */
+ qcode = atomic_cmpxchg(&lock->qlcode, ncode, _QLOCK_LOCKED);
+ /*
+ * The cmpxchg fails only if one or more tasks are added to
+ * the queue. In this case, NOTIFY_NEXT is returned instead
+ * of RELEASE_NODE.
+ */
+ return (qcode != ncode) ? NOTIFY_NEXT : RELEASE_NODE;
+ }
+ /*
+ * Accidentally steal the lock, release the lock and
+ * let the queue head get it.
+ */
+ queue_spin_unlock(lock);
+ return NORMAL_EXIT;
+}
+#endif /* queue_code_xchg */
+
+/*
+ ************************************************************************
+ * Other inline functions needed by the queue_spin_lock_slowpath() *
+ * function. *
+ ************************************************************************
+ */
+
+/**
+ * xlate_qcode - translate the queue code into the queue node address
+ * @qcode: Queue code to be translated
+ * Return: The corresponding queue node address
+ */
+static inline struct qnode *xlate_qcode(u32 qcode)
+{
+ u32 cpu_nr = (qcode >> (_QCODE_VAL_OFFSET + 2)) - 1;
+ u8 qn_idx = (qcode >> _QCODE_VAL_OFFSET) & 3;
+
+ return per_cpu_ptr(&qnset.nodes[qn_idx], cpu_nr);
+}
+
+/**
+ * get_qnode - Get a queue node address as well as the queue code
+ * @cpu : CPU number
+ * @qcode : Pointer to queue code value [out]
+ * Return : queue node address & queue code in qcode
+ */
+static inline struct qnode *get_qnode(int cpu, u32 *qcode)
+{
+ struct qnode_set *qset = this_cpu_ptr(&qnset);
+ int qn_idx = qset->node_idx++;
+
+ /*
+ * It should never happen that all the queue nodes are being used.
+ */
+ BUG_ON(qn_idx >= MAX_QNODES);
+ *qcode = queue_encode_qcode(cpu, qn_idx);
+ return qset->nodes + qn_idx;
+}
+
+/**
+ * put_qnode - Return a queue node to the pool
+ */
+static inline void put_qnode(void)
+{
+ this_cpu_dec(qnset.node_idx);
+}
+
+/**
+ * queue_spin_lock_slowpath - acquire the queue spinlock
+ * @lock : Pointer to queue spinlock structure
+ * @qsval: Current value of the queue spinlock 32-bit word
+ */
+void queue_spin_lock_slowpath(struct qspinlock *lock, int qsval)
+{
+ unsigned int cpu_nr;
+ struct qnode *node, *next;
+ u32 prev_qcode, my_qcode;
+ enum exitval exitval;
+
+ /*
+ * Get the queue node
+ */
+ cpu_nr = smp_processor_id();
+ node = get_qnode(cpu_nr, &my_qcode);
+
+ /*
+ * Initialize the queue node
+ */
+ node->qhead = false;
+ node->next = NULL;
+
+ /*
+ * The lock may be available at this point, try again if no task was
+ * waiting in the queue.
+ */
+ if (!(qsval >> _QCODE_OFFSET) && queue_spin_trylock(lock))
+ goto release_node;
+
+ /*
+ * Exchange current copy of the queue node code
+ */
+ exitval = queue_code_xchg(lock, &prev_qcode, my_qcode);
+ if (unlikely(exitval == NOTIFY_NEXT))
+ goto notify_next;
+ else if (unlikely(exitval == RELEASE_NODE))
+ goto release_node;
+
+ if (prev_qcode) {
+ /*
+ * Not at the queue head, get the address of the previous node
+ * and set up the "next" fields of the that node.
+ */
+ struct qnode *prev = xlate_qcode(prev_qcode);
+
+ ACCESS_ONCE(prev->next) = node;
+ /*
+ * Wait until the queue head flag is on
+ */
+ do {
+ arch_mutex_cpu_relax();
+ } while (!ACCESS_ONCE(node->qhead));
+ }
+
+ /*
+ * At the head of the wait queue now
+ */
+ for (;; arch_mutex_cpu_relax()) {
+ qsval = atomic_read(&lock->qlcode);
+ next = ACCESS_ONCE(node->next);
+ if (qsval & _QLOCK_LOCK_MASK)
+ continue; /* Lock not available yet */
+
+ if (likely(qsval_to_qcode(qsval) != my_qcode)) {
+ /*
+ * There are additional lock waiters in the queue.
+ */
+ if (unlikely(!__queue_spin_trylock(lock)))
+ continue; /* Trylock fails! */
+ if (likely(next))
+ goto set_qhead;
+ else
+ goto notify_next;
+ /*
+ * The queue head is the only lock waiter in the queue.
+ * Get the lock & clear the queue code simultaneously.
+ */
+ } else if (queue_spin_trylock_and_clr_qcode(lock, my_qcode)) {
+ goto release_node;
+ }
+ }
+
+notify_next:
+ /*
+ * Wait, if needed, until the next one in queue set up the next field
+ */
+ while (!(next = ACCESS_ONCE(node->next)))
+ arch_mutex_cpu_relax();
+set_qhead:
+ /*
+ * The next one in queue is now at the head
+ */
+ ACCESS_ONCE(next->qhead) = true;
+
+release_node:
+ put_qnode();
+}
+EXPORT_SYMBOL(queue_spin_lock_slowpath);