@@ -25,31 +25,65 @@
* The distributed and locked list is a distributed set of lists each of
* which is protected by its own spinlock, but acts like a single
* consolidated list to the callers. For scaling purpose, the number of
- * lists used is equal to the number of possible CPUs in the system to
- * minimize contention.
+ * lists used is equal to the number of possible cores in the system to
+ * minimize contention. All threads of the same CPU core will share the
+ * same list.
*
- * However, it is possible that individual CPU numbers may be equal to
- * or greater than the number of possible CPUs when there are holes in
- * the CPU number list. As a result, we need to map the CPU number to a
- * list index.
+ * We need to map each CPU number to a list index.
*/
static DEFINE_PER_CPU_READ_MOSTLY(int, cpu2idx);
+static int nr_dlock_lists __read_mostly;
/*
- * Initialize cpu2idx mapping table
+ * Initialize cpu2idx mapping table & nr_dlock_lists.
*
* It is possible that a dlock-list can be allocated before the cpu2idx is
* initialized. In this case, all the cpus are mapped to the first entry
* before initialization.
*
+ * All the sibling CPUs of a sibling group will map to the same dlock list so
+ * as to reduce the number of dlock lists to be maintained while minimizing
+ * cacheline contention.
+ *
+ * As the sibling masks are set up in the core initcall phase, this function
+ * has to be done in the postcore phase to get the right data.
*/
static int __init cpu2idx_init(void)
{
int idx, cpu;
+ struct cpumask *sibling_mask;
+ static struct cpumask mask __initdata;
+ cpumask_clear(&mask);
idx = 0;
- for_each_possible_cpu(cpu)
- per_cpu(cpu2idx, cpu) = idx++;
+ for_each_possible_cpu(cpu) {
+ int scpu;
+
+ if (cpumask_test_cpu(cpu, &mask))
+ continue;
+ per_cpu(cpu2idx, cpu) = idx;
+ cpumask_set_cpu(cpu, &mask);
+
+ sibling_mask = topology_sibling_cpumask(cpu);
+ if (sibling_mask) {
+ for_each_cpu(scpu, sibling_mask) {
+ per_cpu(cpu2idx, scpu) = idx;
+ cpumask_set_cpu(scpu, &mask);
+ }
+ }
+ idx++;
+ }
+
+ /*
+ * nr_dlock_lists can only be set after cpu2idx is properly
+ * initialized.
+ */
+ smp_mb();
+ nr_dlock_lists = idx;
+ WARN_ON(nr_dlock_lists > nr_cpu_ids);
+
+ pr_info("dlock-list: %d head entries per dlock list.\n",
+ nr_dlock_lists);
return 0;
}
postcore_initcall(cpu2idx_init);
@@ -67,19 +101,23 @@ static int __init cpu2idx_init(void)
*
* Dynamically allocated locks need to have their own special lock class
* to avoid lockdep warning.
+ *
+ * Since nr_dlock_lists will always be <= nr_cpu_ids, having more lists
+ * than necessary allocated is not a problem other than some wasted memory.
+ * The extra lists will not be ever used as all the cpu2idx entries will be
+ * 0 before initialization.
*/
int __alloc_dlock_list_heads(struct dlock_list_heads *dlist,
struct lock_class_key *key)
{
- int idx;
+ int idx, cnt = nr_dlock_lists ? nr_dlock_lists : nr_cpu_ids;
- dlist->heads = kcalloc(nr_cpu_ids, sizeof(struct dlock_list_head),
- GFP_KERNEL);
+ dlist->heads = kcalloc(cnt, sizeof(struct dlock_list_head), GFP_KERNEL);
if (!dlist->heads)
return -ENOMEM;
- for (idx = 0; idx < nr_cpu_ids; idx++) {
+ for (idx = 0; idx < cnt; idx++) {
struct dlock_list_head *head = &dlist->heads[idx];
INIT_LIST_HEAD(&head->list);
@@ -117,7 +155,10 @@ bool dlock_lists_empty(struct dlock_list_heads *dlist)
{
int idx;
- for (idx = 0; idx < nr_cpu_ids; idx++)
+ /* Shouldn't be called before nr_dlock_lists is initialized */
+ WARN_ON_ONCE(!nr_dlock_lists);
+
+ for (idx = 0; idx < nr_dlock_lists; idx++)
if (!list_empty(&dlist->heads[idx].list))
return false;
return true;
@@ -199,6 +240,9 @@ struct dlock_list_node *__dlock_list_next_list(struct dlock_list_iter *iter)
struct dlock_list_node *next;
struct dlock_list_head *head;
+ /* Shouldn't be called before nr_dlock_lists is initialized */
+ WARN_ON_ONCE(!nr_dlock_lists);
+
restart:
if (iter->entry) {
spin_unlock(&iter->entry->lock);
@@ -209,7 +253,7 @@ struct dlock_list_node *__dlock_list_next_list(struct dlock_list_iter *iter)
/*
* Try next list
*/
- if (++iter->index >= nr_cpu_ids)
+ if (++iter->index >= nr_dlock_lists)
return NULL; /* All the entries iterated */
if (list_empty(&iter->head[iter->index].list))