diff mbox

[CI,1/8] async: Add support for explicit fine-grained barriers

Message ID 1463565047-32149-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson May 18, 2016, 9:50 a.m. UTC
The current async-domain model supports running a multitude of
independent tasks with a coarse synchronisation point. This is
sufficient for its original purpose of allowing independent drivers to
run concurrently during various phases (booting, early resume, late
resume etc). However, for greater exploitation, drivers themselves want
to schedule multiple tasks within a phase (or between phases) and
control the order of execution within those tasks relative to each
other. To enable this, we extend the synchronisation scheme to support
explicit barriers called a fence, which act as a semaphore. A fence can
be placed into the async-domain with many dependencies and will only be
passed when all of those dependencies are met. This allows us to build
N:M barriers to precisely control execution between many interoperating
tasks. Each task is itself a fence, useful for coordinating sequential
execution and supporting the current coarse-grained barrier.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Ignore-Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Ignore-Cc: Lukas Wunner <lukas@wunner.de>
Ignore-Cc: Tejun Heo <tj@kernel.org>
Ignore-CC: Dan Williams <dan.j.williams@intel.com>
Ignore-Cc: Andrew Morton <akpm@linux-foundation.org>
Ignore-Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Ignore-Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Ignore-Cc: Oleg Nesterov <oleg@redhat.com>
Ignore-CC: Johannes Berg <johannes.berg@intel.com>
Ignore-Cc: Robert Jarzmik <robert.jarzmik@free.fr>
Ignore-Cc: Alexander Potapenko <glider@google.com>
Ignore-Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Ignore-Cc: "David S. Miller" <davem@davemloft.net>
Ignore-CC: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Ignore-Cc: Peter Zijlstra <peterz@infradead.org>
Ignore-Cc: Kees Cook <keescook@chromium.org>
Ignore-Cc: Valentin Rothberg <valentinrothberg@gmail.com>
Ignore-Cc: Jason Baron <jbaron@akamai.com>
Ignore-Cc: Dmitry Vyukov <dvyukov@google.com>
Ignore-Cc: Ingo Molnar <mingo@kernel.org>
Ignore-Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Ignore-Cc: Chris Metcalf <cmetcalf@ezchip.com>
Ignore-Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Ignore-Cc: David Decotigny <decot@googlers.com>
Ignore-Cc: linux-kernel@vger.kernel.org
Ignore-Cc: linux-kselftest@vger.kernel.org
---
 include/linux/async.h                       |  96 ++++++-
 kernel/async.c                              | 386 ++++++++++++++++++-------
 lib/Kconfig                                 |   7 +
 lib/Makefile                                |   1 +
 lib/test-async-domain.c                     | 429 ++++++++++++++++++++++++++++
 tools/testing/selftests/lib/Makefile        |   2 +-
 tools/testing/selftests/lib/async-domain.sh |  10 +
 7 files changed, 823 insertions(+), 108 deletions(-)
 create mode 100644 lib/test-async-domain.c
 create mode 100755 tools/testing/selftests/lib/async-domain.sh
diff mbox

Patch

diff --git a/include/linux/async.h b/include/linux/async.h
index 6b0226bdaadc..55ed6aa98344 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -13,38 +13,122 @@ 
 #define __ASYNC_H__
 
 #include <linux/types.h>
+#include <linux/completion.h>
+#include <linux/gfp.h>
+#include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/workqueue.h>
 
 typedef u64 async_cookie_t;
 typedef void (*async_func_t) (void *data, async_cookie_t cookie);
+
+struct async_fence {
+	struct completion completion;
+	unsigned long flags;
+	struct kref kref;
+	atomic_t pending;
+};
+
+#define ASYNC_WORK_BIT 1
+#define ASYNC_QUEUED_BIT 2
+
+#define ASYNC_FENCE_INIT(_name) { \
+	.completion = COMPLETION_INITIALIZER(_name.completion), \
+	.kref = { { 1 } } \
+}
+
+struct async_work {
+	struct async_fence fence;
+};
+
 struct async_domain {
-	struct list_head pending;
+	struct async_fence fence;
 	unsigned registered:1;
 };
 
+#define ASYNC_DOMAIN_INIT(_name, r) { \
+	.fence = ASYNC_FENCE_INIT(_name.fence), \
+	.registered = r \
+}
+
 /*
  * domain participates in global async_synchronize_full
  */
 #define ASYNC_DOMAIN(_name) \
-	struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending),	\
-				      .registered = 1 }
+	struct async_domain _name = ASYNC_DOMAIN_INIT(_name, 1)
 
 /*
  * domain is free to go out of scope as soon as all pending work is
  * complete, this domain does not participate in async_synchronize_full
  */
 #define ASYNC_DOMAIN_EXCLUSIVE(_name) \
-	struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \
-				      .registered = 0 }
+	struct async_domain _name = ASYNC_DOMAIN_INIT(_name, 0)
+
+void async_register_domain(struct async_domain *domain, bool registered);
+void async_unregister_domain(struct async_domain *domain);
 
 extern async_cookie_t async_schedule(async_func_t func, void *data);
 extern async_cookie_t async_schedule_domain(async_func_t func, void *data,
 					    struct async_domain *domain);
-void async_unregister_domain(struct async_domain *domain);
+
 extern void async_synchronize_full(void);
 extern void async_synchronize_full_domain(struct async_domain *domain);
 extern void async_synchronize_cookie(async_cookie_t cookie);
 extern void async_synchronize_cookie_domain(async_cookie_t cookie,
 					    struct async_domain *domain);
 extern bool current_is_async(void);
+
+
+struct async_fence *async_fence_create(gfp_t gfp);
+struct async_fence *async_fence_get(struct async_fence *fence);
+async_cookie_t async_fence_add(struct async_fence *fence,
+			       struct async_fence *after,
+			       gfp_t gfp);
+void async_fence_pending(struct async_fence *fence);
+void async_fence_signal(struct async_fence *fence);
+static inline void async_fence_wait(struct async_fence *fence)
+{
+	wait_for_completion(&fence->completion);
+}
+static inline bool async_fence_complete(struct async_fence *fence)
+{
+	return completion_done(&fence->completion);
+}
+void async_fence_put(struct async_fence *fence);
+
+struct async_work *async_work_create(async_func_t func, void *data, gfp_t gfp);
+
+static inline struct async_work *async_work_get(struct async_work *work)
+{
+	async_fence_get(&work->fence);
+	return work;
+}
+
+static inline async_cookie_t
+async_work_after(struct async_work *work, struct async_fence *fence)
+{
+	return async_fence_add(&work->fence, fence, GFP_KERNEL);
+}
+
+static inline async_cookie_t
+async_work_before(struct async_work *work, struct async_fence *fence)
+{
+	return async_fence_add(fence, &work->fence, GFP_KERNEL);
+}
+
+static inline void async_work_wait(struct async_work *work)
+{
+	async_fence_wait(&work->fence);
+}
+
+static inline void async_work_put(struct async_work *work)
+{
+	async_fence_put(&work->fence);
+}
+
+async_cookie_t queue_async_work(struct async_domain *domain,
+				struct async_work *work,
+				gfp_t gfp);
+async_cookie_t schedule_async_work(struct async_work *work);
+
 #endif
diff --git a/kernel/async.c b/kernel/async.c
index d2edd6efec56..58f198091087 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -2,6 +2,7 @@ 
  * async.c: Asynchronous function calls for boot performance
  *
  * (C) Copyright 2009 Intel Corporation
+ * (C) Copyright 2016 Intel Corporation
  * Author: Arjan van de Ven <arjan@linux.intel.com>
  *
  * This program is free software; you can redistribute it and/or
@@ -59,141 +60,256 @@  asynchronous and synchronous parts of the kernel.
 
 #include "workqueue_internal.h"
 
-static async_cookie_t next_cookie = 1;
-
-#define MAX_WORK		32768
-#define ASYNC_COOKIE_MAX	ULLONG_MAX	/* infinity cookie */
-
-static LIST_HEAD(async_global_pending);	/* pending from all registered doms */
-static ASYNC_DOMAIN(async_dfl_domain);
-static DEFINE_SPINLOCK(async_lock);
+#define MAX_WORK 32768
 
 struct async_entry {
-	struct list_head	domain_list;
-	struct list_head	global_list;
-	struct work_struct	work;
-	async_cookie_t		cookie;
-	async_func_t		func;
-	void			*data;
-	struct async_domain	*domain;
+	struct async_work base;
+	struct work_struct work;
+
+	async_cookie_t cookie;
+	async_func_t func;
+	void *data;
 };
 
-static DECLARE_WAIT_QUEUE_HEAD(async_done);
+static ASYNC_DOMAIN_EXCLUSIVE(async_dfl_domain);
 
-static atomic_t entry_count;
+static async_cookie_t assign_cookie(void)
+{
+	static atomic64_t next_cookie;
+	async_cookie_t cookie = atomic64_inc_return(&next_cookie);
+	if (cookie == 0)
+		cookie = atomic64_inc_return(&next_cookie);
+	return cookie;
+}
 
-static async_cookie_t lowest_in_progress(struct async_domain *domain)
+static void async_fence_free(struct kref *kref)
 {
-	struct list_head *pending;
-	async_cookie_t ret = ASYNC_COOKIE_MAX;
-	unsigned long flags;
+	struct async_fence *fence = container_of(kref, typeof(*fence), kref);
 
-	spin_lock_irqsave(&async_lock, flags);
+	WARN_ON(atomic_read(&fence->pending));
 
-	if (domain)
-		pending = &domain->pending;
-	else
-		pending = &async_global_pending;
+	kfree(fence);
+}
 
-	if (!list_empty(pending))
-		ret = list_first_entry(pending, struct async_entry,
-				       domain_list)->cookie;
+void async_fence_put(struct async_fence *fence)
+{
+	if (fence)
+		kref_put(&fence->kref, async_fence_free);
+}
+EXPORT_SYMBOL_GPL(async_fence_put);
 
-	spin_unlock_irqrestore(&async_lock, flags);
-	return ret;
+struct async_fence *async_fence_get(struct async_fence *fence)
+{
+	if (fence)
+		kref_get(&fence->kref);
+	return fence;
 }
+EXPORT_SYMBOL_GPL(async_fence_get);
+
+static void async_fence_execute(struct async_fence *fence)
+{
+	if (test_bit(ASYNC_WORK_BIT, &fence->flags)) {
+		struct async_entry *entry =
+			container_of(fence, typeof(*entry), base.fence);
+		queue_work(system_unbound_wq, &entry->work);
+		return;
+	}
+
+	complete_all(&fence->completion);
+}
+
+void async_fence_pending(struct async_fence *fence)
+{
+	reinit_completion(&fence->completion);
+	smp_mb__before_atomic();
+	atomic_inc(&fence->pending);
+}
+EXPORT_SYMBOL_GPL(async_fence_pending);
+
+void async_fence_signal(struct async_fence *fence)
+{
+	if (atomic_dec_and_test(&fence->pending))
+		async_fence_execute(fence);
+}
+EXPORT_SYMBOL_GPL(async_fence_signal);
+
+static void async_fence_init(struct async_fence *fence)
+{
+	kref_init(&fence->kref);
+	init_completion(&fence->completion);
+	/* an unattached fence is complete */
+	fence->completion.done = UINT_MAX/2;
+
+	atomic_set(&fence->pending, 0);
+	fence->flags = 0;
+}
+
+struct async_fence *async_fence_create(gfp_t gfp)
+{
+	struct async_fence *fence;
+
+	fence = kmalloc(sizeof(*fence), gfp);
+	if (!fence)
+		return NULL;
+
+	async_fence_init(fence);
+	async_fence_pending(fence);
+
+	return fence;
+}
+EXPORT_SYMBOL_GPL(async_fence_create);
+
+static int async_fence_wake(wait_queue_t *wq,
+			    unsigned mode, int flags, void *key)
+{
+	list_del(&wq->task_list);
+	async_fence_signal(wq->private);
+	async_fence_put(wq->private);
+	kfree(wq);
+	return 0;
+}
+
+static bool async_check_not_after(struct async_fence *fence,
+				  struct async_fence *after)
+{
+	wait_queue_t *wq;
+
+	if (fence == NULL)
+		return false;
+
+	if (fence == after)
+		return true;
+
+	list_for_each_entry(wq, &after->completion.wait.task_list, task_list) {
+		if (wq->func != async_fence_wake)
+			continue;
+
+		if (async_check_not_after(wq->private, after))
+			return true;
+	}
+
+	return false;
+}
+
+async_cookie_t async_fence_add(struct async_fence *fence,
+			       struct async_fence *after,
+			       gfp_t gfp)
+{
+	wait_queue_t *wq;
+	unsigned long irqflags;
+	async_cookie_t cookie;
+
+	/* The dependency graph must be acyclic */
+	if (WARN_ON(async_check_not_after(after, fence)))
+		return 0;
+
+	if (!after || async_fence_complete(after))
+		return 0;
+
+	wq = kzalloc(sizeof(*wq), gfp);
+	if (!wq) {
+		if (!WARN_ON(!gfpflags_allow_blocking(gfp)))
+			async_fence_wait(after);
+		return 0;
+	}
+
+	INIT_LIST_HEAD(&wq->task_list);
+	wq->func = async_fence_wake;
+	wq->private = async_fence_get(fence);
+
+	async_fence_pending(fence);
+
+	cookie = 0;
+	spin_lock_irqsave(&after->completion.wait.lock, irqflags);
+	if (!READ_ONCE(after->completion.done)) {
+		__add_wait_queue_tail(&after->completion.wait, wq);
+		cookie = assign_cookie();
+	} else
+		async_fence_wake(wq, 0, 0, NULL);
+	spin_unlock_irqrestore(&after->completion.wait.lock, irqflags);
+
+	return cookie;
+}
+EXPORT_SYMBOL_GPL(async_fence_add);
 
-/*
- * pick the first pending entry and run it
- */
 static void async_run_entry_fn(struct work_struct *work)
 {
-	struct async_entry *entry =
-		container_of(work, struct async_entry, work);
-	unsigned long flags;
-	ktime_t uninitialized_var(calltime), delta, rettime;
+	struct async_entry *entry = container_of(work, typeof(*entry), work);
+	ktime_t uninitialized_var(calltime);
 
-	/* 1) run (and print duration) */
 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
 		pr_debug("calling  %lli_%pF @ %i\n",
 			(long long)entry->cookie,
 			entry->func, task_pid_nr(current));
 		calltime = ktime_get();
 	}
+
 	entry->func(entry->data, entry->cookie);
+
 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
-		rettime = ktime_get();
-		delta = ktime_sub(rettime, calltime);
+		ktime_t delta = ktime_sub(ktime_get(), calltime);
 		pr_debug("initcall %lli_%pF returned 0 after %lld usecs\n",
 			(long long)entry->cookie,
 			entry->func,
 			(long long)ktime_to_ns(delta) >> 10);
 	}
 
-	/* 2) remove self from the pending queues */
-	spin_lock_irqsave(&async_lock, flags);
-	list_del_init(&entry->domain_list);
-	list_del_init(&entry->global_list);
-
-	/* 3) free the entry */
-	kfree(entry);
-	atomic_dec(&entry_count);
-
-	spin_unlock_irqrestore(&async_lock, flags);
-
-	/* 4) wake up any waiters */
-	wake_up(&async_done);
+	complete_all(&entry->base.fence.completion);
+	async_fence_put(&entry->base.fence);
 }
 
-static async_cookie_t __async_schedule(async_func_t func, void *data, struct async_domain *domain)
+struct async_work *async_work_create(async_func_t func, void *data, gfp_t gfp)
 {
 	struct async_entry *entry;
-	unsigned long flags;
-	async_cookie_t newcookie;
-
-	/* allow irq-off callers */
-	entry = kzalloc(sizeof(struct async_entry), GFP_ATOMIC);
-
-	/*
-	 * If we're out of memory or if there's too much work
-	 * pending already, we execute synchronously.
-	 */
-	if (!entry || atomic_read(&entry_count) > MAX_WORK) {
-		kfree(entry);
-		spin_lock_irqsave(&async_lock, flags);
-		newcookie = next_cookie++;
-		spin_unlock_irqrestore(&async_lock, flags);
-
-		/* low on memory.. run synchronously */
-		func(data, newcookie);
-		return newcookie;
-	}
-	INIT_LIST_HEAD(&entry->domain_list);
-	INIT_LIST_HEAD(&entry->global_list);
+
+	entry = kmalloc(sizeof(*entry), gfp);
+	if (!entry)
+		return NULL;
+
+	async_fence_init(&entry->base.fence);
+
 	INIT_WORK(&entry->work, async_run_entry_fn);
+	entry->cookie = 0;
 	entry->func = func;
 	entry->data = data;
-	entry->domain = domain;
 
-	spin_lock_irqsave(&async_lock, flags);
+	set_bit(ASYNC_WORK_BIT, &entry->base.fence.flags);
+	async_fence_pending(&entry->base.fence);
 
-	/* allocate cookie and queue */
-	newcookie = entry->cookie = next_cookie++;
+	return &entry->base;
+}
+EXPORT_SYMBOL_GPL(async_work_create);
 
-	list_add_tail(&entry->domain_list, &domain->pending);
-	if (domain->registered)
-		list_add_tail(&entry->global_list, &async_global_pending);
+async_cookie_t queue_async_work(struct async_domain *domain,
+				struct async_work *work,
+				gfp_t gfp)
+{
+	struct async_entry *entry = container_of(work, typeof(*entry), base);
+	async_cookie_t cookie;
+
+	if (WARN_ON(test_and_set_bit(ASYNC_QUEUED_BIT,
+				     &entry->base.fence.flags)))
+		return 0;
 
-	atomic_inc(&entry_count);
-	spin_unlock_irqrestore(&async_lock, flags);
+	cookie = async_fence_add(&domain->fence, &entry->base.fence, gfp);
+	if (domain->registered)
+		cookie = async_fence_add(&async_dfl_domain.fence,
+					 &entry->base.fence,
+					 gfp);
+	entry->cookie = cookie;
 
 	/* mark that this task has queued an async job, used by module init */
 	current->flags |= PF_USED_ASYNC;
 
-	/* schedule for execution */
-	queue_work(system_unbound_wq, &entry->work);
+	async_fence_signal(async_fence_get(&entry->base.fence));
 
-	return newcookie;
+	return entry->cookie;
+}
+EXPORT_SYMBOL_GPL(queue_async_work);
+
+async_cookie_t schedule_async_work(struct async_work *work)
+{
+	return queue_async_work(&async_dfl_domain, work, GFP_KERNEL);
 }
 
 /**
@@ -206,7 +322,7 @@  static async_cookie_t __async_schedule(async_func_t func, void *data, struct asy
  */
 async_cookie_t async_schedule(async_func_t func, void *data)
 {
-	return __async_schedule(func, data, &async_dfl_domain);
+	return async_schedule_domain(func, data, &async_dfl_domain);
 }
 EXPORT_SYMBOL_GPL(async_schedule);
 
@@ -225,7 +341,23 @@  EXPORT_SYMBOL_GPL(async_schedule);
 async_cookie_t async_schedule_domain(async_func_t func, void *data,
 				     struct async_domain *domain)
 {
-	return __async_schedule(func, data, domain);
+	struct async_work *work;
+	async_cookie_t cookie = 0;
+
+	work = NULL;
+	if (atomic_read(&domain->fence.pending) < MAX_WORK)
+		work = async_work_create(func, data, GFP_ATOMIC);
+	if (work) {
+		cookie = queue_async_work(domain, work, GFP_ATOMIC);
+		async_work_put(work);
+	}
+
+	if (!cookie) {
+		cookie = assign_cookie();
+		func(data, cookie);
+	}
+
+	return cookie;
 }
 EXPORT_SYMBOL_GPL(async_schedule_domain);
 
@@ -236,10 +368,17 @@  EXPORT_SYMBOL_GPL(async_schedule_domain);
  */
 void async_synchronize_full(void)
 {
-	async_synchronize_full_domain(NULL);
+	async_synchronize_full_domain(&async_dfl_domain);
 }
 EXPORT_SYMBOL_GPL(async_synchronize_full);
 
+void async_register_domain(struct async_domain *domain, bool registered)
+{
+	async_fence_init(&domain->fence);
+	domain->registered = registered;
+}
+EXPORT_SYMBOL_GPL(async_register_domain);
+
 /**
  * async_unregister_domain - ensure no more anonymous waiters on this domain
  * @domain: idle domain to flush out of any async_synchronize_full instances
@@ -251,10 +390,8 @@  EXPORT_SYMBOL_GPL(async_synchronize_full);
  */
 void async_unregister_domain(struct async_domain *domain)
 {
-	spin_lock_irq(&async_lock);
-	WARN_ON(!domain->registered || !list_empty(&domain->pending));
+	WARN_ON(atomic_read(&domain->fence.pending));
 	domain->registered = 0;
-	spin_unlock_irq(&async_lock);
 }
 EXPORT_SYMBOL_GPL(async_unregister_domain);
 
@@ -267,10 +404,29 @@  EXPORT_SYMBOL_GPL(async_unregister_domain);
  */
 void async_synchronize_full_domain(struct async_domain *domain)
 {
-	async_synchronize_cookie_domain(ASYNC_COOKIE_MAX, domain);
+	ktime_t uninitialized_var(starttime);
+
+	if (initcall_debug && system_state == SYSTEM_BOOTING) {
+		pr_debug("async_waiting @ %i\n", task_pid_nr(current));
+		starttime = ktime_get();
+	}
+
+	async_fence_wait(&domain->fence);
+
+	if (initcall_debug && system_state == SYSTEM_BOOTING) {
+		ktime_t delta = ktime_sub(ktime_get(), starttime);
+		pr_debug("async_continuing @ %i after %lli usec\n",
+			task_pid_nr(current),
+			(long long)ktime_to_ns(delta) >> 10);
+	}
 }
 EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
 
+static async_cookie_t to_cookie(struct async_fence *entry)
+{
+	return container_of(entry, struct async_entry, base.fence)->cookie;
+}
+
 /**
  * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing
  * @cookie: async_cookie_t to use as checkpoint
@@ -282,19 +438,47 @@  EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
  */
 void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain)
 {
-	ktime_t uninitialized_var(starttime), delta, endtime;
+	ktime_t uninitialized_var(starttime);
+	struct completion *x = &domain->fence.completion;
 
 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
 		pr_debug("async_waiting @ %i\n", task_pid_nr(current));
 		starttime = ktime_get();
 	}
 
-	wait_event(async_done, lowest_in_progress(domain) >= cookie);
+	do {
+		struct async_fence *fence = NULL;
+		unsigned long irqflags;
+		wait_queue_t *wq;
 
-	if (initcall_debug && system_state == SYSTEM_BOOTING) {
-		endtime = ktime_get();
-		delta = ktime_sub(endtime, starttime);
+		spin_lock_irqsave(&x->wait.lock, irqflags);
+		list_for_each_entry(wq, &x->wait.task_list, task_list) {
+			struct async_fence *entry;
+
+			if (wq->func != async_fence_wake)
+				continue;
 
+			entry = wq->private;
+			if (!test_bit(ASYNC_WORK_BIT, &entry->flags))
+				continue;
+
+			if ((s64)(cookie - to_cookie(entry)) < 0)
+				break;
+
+			fence = entry;
+		}
+		async_fence_get(fence);
+		spin_unlock_irqrestore(&x->wait.lock, irqflags);
+
+		if (!fence)
+			break;
+
+		async_fence_wait(fence);
+		async_fence_put(fence);
+	} while (1);
+
+	if (initcall_debug && system_state == SYSTEM_BOOTING) {
+		ktime_t delta = ktime_sub(ktime_get(), starttime);
 		pr_debug("async_continuing @ %i after %lli usec\n",
 			task_pid_nr(current),
 			(long long)ktime_to_ns(delta) >> 10);
diff --git a/lib/Kconfig b/lib/Kconfig
index 3cca1222578e..2477b91a0edb 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -24,6 +24,13 @@  config HAVE_ARCH_BITREVERSE
 config RATIONAL
 	bool
 
+config ASYNC_DOMAIN_SELFTEST
+	tristate "Perform asynchronous domain self tests on init"
+	default n
+	help
+	  This option enables the asynchronous domain library functions
+	  to perform a self test on initialization.
+
 config GENERIC_STRNCPY_FROM_USER
 	bool
 
diff --git a/lib/Makefile b/lib/Makefile
index 7bd6fd436c97..a2f60c03324f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -28,6 +28,7 @@  lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 earlycpio.o seq_buf.o nmi_backtrace.o
 
 obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
+obj-$(CONFIG_ASYNC_DOMAIN_SELFTEST) += test-async-domain.o
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 lib-$(CONFIG_HAS_DMA) += dma-noop.o
diff --git a/lib/test-async-domain.c b/lib/test-async-domain.c
new file mode 100644
index 000000000000..757ea4ae42a0
--- /dev/null
+++ b/lib/test-async-domain.c
@@ -0,0 +1,429 @@ 
+/*
+ * Test cases for async-domain facility.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/async.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+
+static void task_A(void *data, async_cookie_t cookie)
+{
+	long *result = data;
+	smp_store_mb(*result, 'A');
+}
+
+static void task_B(void *data, async_cookie_t cookie)
+{
+	long *result = data;
+	usleep_range(100, 200);
+	smp_store_mb(*result, 'B');
+}
+
+static int __init test_x(const char *name,
+			 struct async_domain *domain,
+			 async_func_t func,
+			 const long expected)
+{
+	struct async_work *A;
+	long result = 0;
+
+	A = async_work_create(func, &result, GFP_KERNEL);
+	if (!A)
+		return -ENOMEM;
+
+	queue_async_work(domain, A, GFP_KERNEL);
+	async_work_wait(A);
+	async_work_put(A);
+
+	if (READ_ONCE(result) != expected) {
+		pr_warn("%s expected %c [%ld], got %ld\n",
+			name, (char)expected, expected, result);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __init test_A(struct async_domain *domain)
+{
+	return test_x(__func__, domain, task_A, 'A');
+}
+
+static int __init test_B(struct async_domain *domain)
+{
+	return test_x(__func__, domain, task_B, 'B');
+}
+
+static int __init test_x_fence(const char *name,
+			       struct async_domain *domain,
+			       async_func_t func,
+			       const long expected)
+{
+	struct async_work *A;
+	struct async_fence *fence;
+	long result = 0;
+
+	A = async_work_create(func, &result, GFP_KERNEL);
+	if (!A)
+		return -ENOMEM;
+
+	fence = async_fence_create(GFP_KERNEL);
+	if (!fence)
+		return -ENOMEM;
+
+	queue_async_work(domain, A, GFP_KERNEL);
+	async_fence_add(fence, &A->fence, GFP_KERNEL);
+	async_fence_signal(fence);
+
+	async_fence_wait(fence);
+
+	async_work_put(A);
+	async_fence_put(fence);
+
+	if (READ_ONCE(result) != expected) {
+		pr_warn("%s expected %c [%ld], got %ld\n",
+			name, (char)expected, expected, result);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __init test_A_fence(struct async_domain *domain)
+{
+	return test_x_fence(__func__, domain, task_A, 'A');
+}
+
+static int __init test_B_fence(struct async_domain *domain)
+{
+	return test_x_fence(__func__, domain, task_B, 'B');
+}
+
+static int __init test_x_fence_y(const char *name,
+				 struct async_domain *domain,
+				 async_func_t x,
+				 async_func_t y,
+				 const long expected)
+{
+	struct async_work *A, *B;
+	struct async_fence *fence;
+	long result = 0;
+
+	A = async_work_create(x, &result, GFP_KERNEL);
+	if (!A)
+		return -ENOMEM;
+
+	B = async_work_create(y, &result, GFP_KERNEL);
+	if (!B)
+		return -ENOMEM;
+
+	fence = async_fence_create(GFP_KERNEL);
+	if (!fence)
+		return -ENOMEM;
+
+	async_fence_add(fence, &A->fence, GFP_KERNEL);
+	queue_async_work(domain, A, GFP_KERNEL);
+	async_work_put(A);
+
+	async_work_after(B, fence);
+	queue_async_work(domain, B, GFP_KERNEL);
+
+	async_fence_signal(fence);
+	async_fence_put(fence);
+
+	async_work_wait(B);
+	async_work_put(B);
+
+	if (READ_ONCE(result) != expected) {
+		pr_warn("%s expected %c [%ld], got %ld\n",
+			name, (char)expected, expected, result);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __init test_A_fence_B(struct async_domain *domain)
+{
+	return test_x_fence_y(__func__, domain, task_A, task_B, 'B');
+}
+
+static int __init test_B_fence_A(struct async_domain *domain)
+{
+	return test_x_fence_y(__func__, domain, task_B, task_A, 'A');
+}
+
+struct long_context {
+	struct async_fence *barrier;
+	long *src;
+	long result;
+};
+
+static void task_wait(void *data, async_cookie_t cookie)
+{
+	struct long_context *ctx = data;
+
+	async_fence_wait(ctx->barrier);
+	smp_store_mb(ctx->result, READ_ONCE(*ctx->src));
+}
+
+static int __init test_pause(struct async_domain *domain)
+{
+	struct long_context ctx;
+	struct async_work *A, *B;
+	const long expected = 'B';
+	long out_B = 'A';
+
+	ctx.result = 0;
+	ctx.src = &out_B;
+
+	A = async_work_create(task_wait, &ctx, GFP_KERNEL);
+	if (!A)
+		return -ENOMEM;
+
+	B = async_work_create(task_B, &out_B, GFP_KERNEL);
+	if (!B)
+		return -ENOMEM;
+
+	ctx.barrier = &B->fence;
+
+	queue_async_work(domain, A, GFP_KERNEL);
+	queue_async_work(domain, B, GFP_KERNEL);
+	async_work_put(B);
+
+	async_work_wait(A);
+	async_work_put(A);
+
+	if (READ_ONCE(ctx.result) != expected) {
+		pr_warn("%s expected %c [%ld], got %ld\n",
+			__func__, (char)expected, expected, ctx.result);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void task_signal(void *data, async_cookie_t cookie)
+{
+	struct long_context *ctx = data;
+
+	async_fence_signal(ctx->barrier);
+}
+
+static int __init test_manual(struct async_domain *domain)
+{
+	struct long_context ctx;
+	struct async_work *A, *B, *C;
+	const long expected = 'B';
+	long out_B = 'A';
+
+	ctx.result = 0;
+	ctx.src = &out_B;
+	ctx.barrier = async_fence_create(GFP_KERNEL);
+
+	A = async_work_create(task_wait, &ctx, GFP_KERNEL);
+	if (!A)
+		return -ENOMEM;
+
+	B = async_work_create(task_B, &out_B, GFP_KERNEL);
+	if (!B)
+		return -ENOMEM;
+
+	C = async_work_create(task_signal, &ctx, GFP_KERNEL);
+	if (!B)
+		return -ENOMEM;
+
+	async_work_after(C, &B->fence);
+
+	queue_async_work(domain, A, GFP_KERNEL);
+	queue_async_work(domain, B, GFP_KERNEL);
+	queue_async_work(domain, C, GFP_KERNEL);
+
+	async_work_wait(A);
+
+	async_work_put(C);
+	async_work_put(B);
+	async_work_put(A);
+	async_fence_put(ctx.barrier);
+
+	if (READ_ONCE(ctx.result) != expected) {
+		pr_warn("%s expected %c [%ld], got %ld\n",
+			__func__, (char)expected, expected, ctx.result);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __init test_sync(struct async_domain *domain)
+{
+	struct async_work *B;
+	const long expected = 'B';
+	long result = 0;
+
+	B = async_work_create(task_B, &result, GFP_KERNEL);
+	if (!B)
+		return -ENOMEM;
+
+	queue_async_work(domain, B, GFP_KERNEL);
+	async_work_put(B);
+
+	async_synchronize_full_domain(domain);
+
+	if (READ_ONCE(result) != expected) {
+		pr_warn("%s expected %c [%ld], got %ld\n",
+			__func__, (char)expected, expected, result);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __init test_implicit(struct async_domain *domain)
+{
+	const long expected = 'B';
+	long result = 0;
+
+	if (!async_schedule_domain(task_B, &result, domain))
+		return -ENOMEM;
+
+	async_synchronize_full_domain(domain);
+
+	if (READ_ONCE(result) != expected) {
+		pr_warn("%s expected %c [%ld], got %ld\n",
+			__func__, (char)expected, expected, result);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __init test_registered(struct async_domain *domain)
+{
+	const long expected = 'B';
+	long result = 0;
+
+	if (!async_schedule_domain(task_B, &result, domain))
+		return -ENOMEM;
+
+	async_synchronize_full();
+
+	if (READ_ONCE(result) != expected) {
+		pr_warn("%s expected %c [%ld], got %ld\n",
+			__func__, (char)expected, expected, result);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void task_nop(void *data, async_cookie_t cookie)
+{
+	async_cookie_t *result = data;
+	smp_store_mb(*result, cookie);
+}
+
+static int __init perf_nop(int batch, long timeout_us)
+{
+	ktime_t start;
+	async_cookie_t nop, last;
+	long count, delay;
+
+	count = 0;
+	nop = last = 0;
+	start = ktime_get();
+	do {
+		ktime_t delta;
+		int n;
+
+		for (n = 0; n < batch; n++)
+			last = async_schedule(task_nop, &nop);
+		async_synchronize_full();
+		delta = ktime_sub(ktime_get(), start);
+		delay = ktime_to_ns(delta) >> 10;
+		count += batch;
+	} while (delay < timeout_us);
+
+	pr_info("%ld nop tasks (batches of %d) completed in %ldus; last queued %lld, saw %lld\n",
+		count, batch, delay,
+		(long long)last, (long long)READ_ONCE(nop));
+	return 0;
+}
+
+static int __init test_async_domain_init(void)
+{
+	struct async_domain domain;
+	int ret;
+
+	pr_info("Testing async-domains\n");
+
+	async_register_domain(&domain, true);
+
+	ret = test_A(&domain);
+	if (ret)
+		return ret;
+
+	ret = test_A_fence(&domain);
+	if (ret)
+		return ret;
+
+	ret = test_A_fence_B(&domain);
+	if (ret)
+		return ret;
+
+	ret = test_B(&domain);
+	if (ret)
+		return ret;
+
+	ret = test_B_fence(&domain);
+	if (ret)
+		return ret;
+
+	ret = test_B_fence_A(&domain);
+	if (ret)
+		return ret;
+
+	ret = test_pause(&domain);
+	if (ret)
+		return ret;
+
+	ret = test_manual(&domain);
+	if (ret)
+		return ret;
+
+	ret = test_sync(&domain);
+	if (ret)
+		return ret;
+
+	ret = test_implicit(&domain);
+	if (ret)
+		return ret;
+
+	ret = test_registered(&domain);
+	if (ret)
+		return ret;
+
+	ret = perf_nop(1, 100);
+	if (ret)
+		return ret;
+
+	ret = perf_nop(128, 1000);
+	if (ret)
+		return ret;
+
+	async_unregister_domain(&domain);
+	return 0;
+}
+
+static void __exit test_async_domain_cleanup(void)
+{
+	async_synchronize_full();
+}
+
+module_init(test_async_domain_init);
+module_exit(test_async_domain_cleanup);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index 08360060ab14..46a77ac5b4c6 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -3,6 +3,6 @@ 
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
 all:
 
-TEST_PROGS := printf.sh bitmap.sh
+TEST_PROGS := printf.sh bitmap.sh async-domain.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/lib/async-domain.sh b/tools/testing/selftests/lib/async-domain.sh
new file mode 100755
index 000000000000..cd10127d56fa
--- /dev/null
+++ b/tools/testing/selftests/lib/async-domain.sh
@@ -0,0 +1,10 @@ 
+#!/bin/sh
+# Runs infrastructure tests using test-async-timeline kernel module
+
+if /sbin/modprobe -q test-async-domain; then
+	/sbin/modprobe -q -r test-async-domain
+	echo "async-domain: ok"
+else
+	echo "async-domain: [FAIL]"
+	exit 1
+fi