new file mode 100644
@@ -0,0 +1,231 @@
+/*
+ * Per-cpu list
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2016 Hewlett-Packard Enterprise Development LP
+ *
+ * Authors: Waiman Long <waiman.long@hpe.com>
+ */
+#ifndef __LINUX_PERCPU_LIST_H
+#define __LINUX_PERCPU_LIST_H
+
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+
+/*
+ * include/linux/percpu-list.h
+ *
+ * A per-cpu list protected by a per-cpu spinlock.
+ *
+ * The pcpu_list_head structure contains the spinlock, the other
+ * pcpu_list_node structures only contains a pointer to the spinlock in
+ * pcpu_list_head.
+ */
+struct pcpu_list_head {
+ struct list_head list;
+ spinlock_t lock;
+};
+
+#define PCPU_LIST_HEAD_INIT(name) \
+ { \
+ .list.prev = &name.list, \
+ .list.next = &name.list, \
+ .list.lock = __SPIN_LOCK_UNLOCKED(name), \
+ }
+
+/*
+ * Per-cpu list iteration state
+ */
+struct pcpu_list_state {
+ int cpu;
+ spinlock_t *lock;
+ struct list_head *head; /* List head of current per-cpu list */
+ struct pcpu_list_node *curr;
+ struct pcpu_list_node *next;
+};
+
+#define PCPU_LIST_STATE_INIT() \
+ { \
+ .cpu = -1, \
+ .lock = NULL, \
+ .head = NULL, \
+ .curr = NULL, \
+ .next = NULL, \
+ }
+
+#define DEFINE_PCPU_LIST_STATE(s) \
+ struct pcpu_list_state s = PCPU_LIST_STATE_INIT()
+
+static inline void init_pcpu_list_state(struct pcpu_list_state *state)
+{
+ state->cpu = -1;
+ state->lock = NULL;
+ state->head = NULL;
+ state->curr = NULL;
+ state->next = NULL;
+}
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define PERCPU_LIST_WARN_ON(x) WARN_ON(x)
+#else
+#define PERCPU_LIST_WARN_ON(x)
+#endif
+
+/*
+ * Next per-cpu list entry
+ */
+#define pcpu_list_next_entry(pos, member) list_next_entry(pos, member.list)
+
+/*
+ * Per-cpu node data structure
+ */
+struct pcpu_list_node {
+ struct list_head list;
+ spinlock_t *lockptr;
+};
+
+#define PCPU_LIST_NODE_INIT(name) \
+ { \
+ .list.prev = &name.list, \
+ .list.next = &name.list, \
+ .list.lockptr = NULL \
+ }
+
+static inline void init_pcpu_list_node(struct pcpu_list_node *node)
+{
+ INIT_LIST_HEAD(&node->list);
+ node->lockptr = NULL;
+}
+
+static inline void free_pcpu_list_head(struct pcpu_list_head **ppcpu_head)
+{
+ free_percpu(*ppcpu_head);
+ *ppcpu_head = NULL;
+}
+
+/*
+ * Check if all the per-cpu lists are empty
+ */
+static inline bool pcpu_list_empty(struct pcpu_list_head *pcpu_head)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ if (!list_empty(&per_cpu_ptr(pcpu_head, cpu)->list))
+ return false;
+ return true;
+}
+
+/*
+ * Helper function to find the first entry of the next per-cpu list
+ * It works somewhat like for_each_possible_cpu(cpu).
+ *
+ * Return: true if the entry is found, false if all the lists exhausted
+ */
+static __always_inline bool
+__pcpu_list_next_cpu(struct pcpu_list_head *head, struct pcpu_list_state *state)
+{
+ if (state->lock)
+ spin_unlock(state->lock);
+next_cpu:
+ /*
+ * for_each_possible_cpu(cpu)
+ */
+ state->cpu = cpumask_next(state->cpu, cpu_possible_mask);
+ if (state->cpu >= nr_cpu_ids)
+ return false; /* All the per-cpu lists iterated */
+
+ state->head = &per_cpu_ptr(head, state->cpu)->list;
+ if (list_empty(state->head))
+ goto next_cpu;
+
+ state->lock = &per_cpu_ptr(head, state->cpu)->lock;
+ spin_lock(state->lock);
+ /*
+ * There is a slight chance that the list may become empty just
+ * before the lock is acquired. So an additional check is
+ * needed to make sure that state->curr points to a valid entry.
+ */
+ if (list_empty(state->head)) {
+ spin_unlock(state->lock);
+ goto next_cpu;
+ }
+ state->curr = list_entry(state->head->next,
+ struct pcpu_list_node, list);
+ return true;
+}
+
+/*
+ * Iterate to the next entry of the group of per-cpu lists
+ *
+ * Return: true if the next entry is found, false if all the entries iterated
+ */
+static inline bool pcpu_list_iterate(struct pcpu_list_head *head,
+ struct pcpu_list_state *state)
+{
+ /*
+ * Find next entry
+ */
+ if (state->curr)
+ state->curr = list_next_entry(state->curr, list);
+
+ if (!state->curr || (&state->curr->list == state->head)) {
+ /*
+ * The current per-cpu list has been exhausted, try the next
+ * per-cpu list.
+ */
+ if (!__pcpu_list_next_cpu(head, state))
+ return false;
+ }
+
+ PERCPU_LIST_WARN_ON(state->curr->lockptr != state->lock);
+ return true; /* Continue the iteration */
+}
+
+/*
+ * Iterate to the next entry of the group of per-cpu lists and safe
+ * against removal of list_entry
+ *
+ * Return: true if the next entry is found, false if all the entries iterated
+ */
+static inline bool pcpu_list_iterate_safe(struct pcpu_list_head *head,
+ struct pcpu_list_state *state)
+{
+ /*
+ * Find next entry
+ */
+ if (state->curr) {
+ state->curr = state->next;
+ state->next = list_next_entry(state->next, list);
+ }
+
+ if (!state->curr || (&state->curr->list == state->head)) {
+ /*
+ * The current per-cpu list has been exhausted, try the next
+ * per-cpu list.
+ */
+ if (!__pcpu_list_next_cpu(head, state))
+ return false;
+ state->next = list_next_entry(state->curr, list);
+ }
+
+ PERCPU_LIST_WARN_ON(state->curr->lockptr != state->lock);
+ return true; /* Continue the iteration */
+}
+
+extern void pcpu_list_add(struct pcpu_list_node *node,
+ struct pcpu_list_head *head);
+extern void pcpu_list_del(struct pcpu_list_node *node);
+extern int init_pcpu_list_head(struct pcpu_list_head **ppcpu_head);
+
+#endif /* __LINUX_PERCPU_LIST_H */
@@ -40,7 +40,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
bsearch.o find_bit.o llist.o memweight.o kfifo.o \
percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \
- once.o
+ once.o percpu-list.o
obj-y += string_helpers.o
obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
obj-y += hexdump.o
new file mode 100644
@@ -0,0 +1,100 @@
+/*
+ * Per-cpu list
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2016 Hewlett-Packard Enterprise Development LP
+ *
+ * Authors: Waiman Long <waiman.long@hpe.com>
+ */
+#include <linux/percpu-list.h>
+#include <linux/lockdep.h>
+
+/*
+ * The per-cpu list lock needs its own class to avoid warning and stack
+ * trace when lockdep is enabled.
+ */
+static struct lock_class_key percpu_list_key;
+
+/*
+ * Initialize the per-cpu list head
+ */
+int init_pcpu_list_head(struct pcpu_list_head **ppcpu_head)
+{
+ struct pcpu_list_head *pcpu_head = alloc_percpu(struct pcpu_list_head);
+ int cpu;
+
+ if (!pcpu_head)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ struct pcpu_list_head *head = per_cpu_ptr(pcpu_head, cpu);
+
+ INIT_LIST_HEAD(&head->list);
+ head->lock = __SPIN_LOCK_UNLOCKED(&head->lock);
+ lockdep_set_class(&head->lock, &percpu_list_key);
+ }
+
+ *ppcpu_head = pcpu_head;
+ return 0;
+}
+
+/*
+ * List selection is based on the CPU being used when the pcpu_list_add()
+ * function is called. However, deletion may be done by a different CPU.
+ * So we still need to use a lock to protect the content of the list.
+ */
+void pcpu_list_add(struct pcpu_list_node *node, struct pcpu_list_head *head)
+{
+ struct pcpu_list_head *myhead;
+
+ /*
+ * Disable preemption to make sure that CPU won't gets changed.
+ */
+ preempt_disable();
+ myhead = this_cpu_ptr(head);
+ spin_lock(&myhead->lock);
+ node->lockptr = &myhead->lock;
+ list_add(&node->list, &myhead->list);
+ spin_unlock(&myhead->lock);
+ preempt_enable();
+}
+
+/*
+ * Delete a node from a percpu list
+ *
+ * We need to check the lock pointer again after taking the lock to guard
+ * against concurrent delete of the same node. If the lock pointer changes
+ * (becomes NULL or to a different one), we assume that the deletion was done
+ * elsewhere.
+ */
+void pcpu_list_del(struct pcpu_list_node *node)
+{
+ spinlock_t *lock = READ_ONCE(node->lockptr);
+
+ if (unlikely(!lock)) {
+ WARN(1, "pcpu_list_del: node 0x%lx has no associated lock\n",
+ (unsigned long)node);
+ return;
+ }
+
+ spin_lock(lock);
+ if (likely(lock == node->lockptr)) {
+ list_del_init(&node->list);
+ node->lockptr = NULL;
+ } else {
+ /*
+ * This path should never be executed.
+ */
+ WARN_ON(1);
+ }
+ spin_unlock(lock);
+}
Linked list is used everywhere in the Linux kernel. However, if many threads are trying to add or delete entries into the same linked list, it can create a performance bottleneck. This patch introduces a new per-cpu list subystem with associated per-cpu locks for protecting each of the lists individually. This allows list entries insertion and deletion operations to happen in parallel instead of being serialized with a global list and lock. List entry insertion is strictly per cpu. List deletion, however, can happen in a cpu other than the one that did the insertion. So we still need lock to protect the list. Because of that, there may still be a small amount of contention when deletion is being done. A new header file include/linux/percpu-list.h will be added with the associated pcpu_list_head and pcpu_list_node structures. The following functions are provided to manage the per-cpu list: 1. int init_pcpu_list_head(struct pcpu_list_head **ppcpu_head) 2. void pcpu_list_add(struct pcpu_list_node *node, struct pcpu_list_head *head) 3. void pcpu_list_del(struct pcpu_list *node) Iteration of all the list entries within a group of per-cpu lists is done by calling either the pcpu_list_iterate() or pcpu_list_iterate_safe() functions in a while loop. They correspond to the list_for_each_entry() and list_for_each_entry_safe() macros respectively. The iteration states are keep in a pcpu_list_state structure that is passed to the iteration functions. Signed-off-by: Waiman Long <Waiman.Long@hpe.com> --- include/linux/percpu-list.h | 231 +++++++++++++++++++++++++++++++++++++++++++ lib/Makefile | 2 +- lib/percpu-list.c | 100 +++++++++++++++++++ 3 files changed, 332 insertions(+), 1 deletions(-) create mode 100644 include/linux/percpu-list.h create mode 100644 lib/percpu-list.c