diff mbox

[01/13] drm/i915: Add a sw fence for collecting up dma fences

Message ID 20160825090839.9952-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Aug. 25, 2016, 9:08 a.m. UTC
This is really a core kernel struct in disguise until we can finally
place it in kernel/. There is an immediate need for a fence collection
mechanism that is more flexible than fence-array, in particular being
able to easily drive request submission via events (and not just
interrupt driven). The same mechanism would be useful for handling
nonblocking and asynchronous atomic modesets, parallel execution and
more, but for the time being just create a local sw fence for execbuf.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile        |   1 +
 drivers/gpu/drm/i915/i915_sw_fence.c | 329 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_sw_fence.h |  56 ++++++
 3 files changed, 386 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_sw_fence.c
 create mode 100644 drivers/gpu/drm/i915/i915_sw_fence.h

Comments

John Harrison Aug. 25, 2016, 10:42 a.m. UTC | #1
Writing up the IRC discussion...

I think the naming of _complete() vs _done() is unnecessarily confusing 
- the first being an action and the second a query. Chris says that the 
idea is to match the existing naming convention of 'struct completion'. 
If the plan is to eventually move this code out of i915 and into the 
kernel proper then I guess following like a lemming is probably the way 
to go :(.

I also don't like the idea of packing the flags into the lower bits of a 
function pointer. Or going on the naming, packing a function pointer 
into the upper bits of a flags word. There was a comment about atomic 
updates of both flags and pointer but that is never actually used in the 
code. Chris says it is only for better structure packing. I'm not 
convinced the benefit is worth the confusion and maintenance risk. E.g. 
is unsigned long guaranteed to always and ever be sufficient for a pointer?

John.

On 25/08/2016 10:08, Chris Wilson wrote:
> This is really a core kernel struct in disguise until we can finally
> place it in kernel/. There is an immediate need for a fence collection
> mechanism that is more flexible than fence-array, in particular being
> able to easily drive request submission via events (and not just
> interrupt driven). The same mechanism would be useful for handling
> nonblocking and asynchronous atomic modesets, parallel execution and
> more, but for the time being just create a local sw fence for execbuf.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/Makefile        |   1 +
>   drivers/gpu/drm/i915/i915_sw_fence.c | 329 +++++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_sw_fence.h |  56 ++++++
>   3 files changed, 386 insertions(+)
>   create mode 100644 drivers/gpu/drm/i915/i915_sw_fence.c
>   create mode 100644 drivers/gpu/drm/i915/i915_sw_fence.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index a7da24640e88..a998c2bce70a 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -16,6 +16,7 @@ i915-y := i915_drv.o \
>   	  i915_params.o \
>   	  i915_pci.o \
>             i915_suspend.o \
> +	  i915_sw_fence.o \
>   	  i915_sysfs.o \
>   	  intel_csr.o \
>   	  intel_device_info.o \
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
> new file mode 100644
> index 000000000000..44a02d836a94
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> @@ -0,0 +1,329 @@
> +/*
> + * (C) Copyright 2016 Intel Corporation
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; version 2
> + * of the License.
> + */
> +
> +#include <linux/slab.h>
> +#include <linux/fence.h>
> +#include <linux/reservation.h>
> +
> +#include "i915_sw_fence.h"
> +
> +static DEFINE_SPINLOCK(i915_sw_fence_lock);
> +
> +static int __i915_sw_fence_notify(struct i915_sw_fence *fence)
> +{
> +	i915_sw_fence_notify_t fn;
> +
> +	fn = (i915_sw_fence_notify_t)(fence->flags & I915_SW_FENCE_MASK);
> +	return fn(fence);
> +}
> +
> +static void i915_sw_fence_free(struct kref *kref)
> +{
> +	struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
> +
> +	WARN_ON(atomic_read(&fence->pending) > 0);
> +
> +	if (fence->flags & I915_SW_FENCE_MASK)
> +		WARN_ON(__i915_sw_fence_notify(fence) != NOTIFY_DONE);
> +	else
> +		kfree(fence);
> +}
> +
> +static void i915_sw_fence_put(struct i915_sw_fence *fence)
> +{
> +	kref_put(&fence->kref, i915_sw_fence_free);
> +}
> +
> +static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence)
> +{
> +	kref_get(&fence->kref);
> +	return fence;
> +}
> +
> +static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
> +				 struct list_head *continuation)
> +{
> +	wait_queue_head_t *x = &fence->wait;
> +	unsigned long flags;
> +
> +	atomic_dec(&fence->pending);
> +
> +	/*
> +	 * To prevent unbounded recursion as we traverse the graph of i915_sw_fences,
> +	 * we move the task_list from this the next ready fence to the tail of
> +	 * the original fence's task_list (and so added to the list to be
> +	 * woken).
> +	 */
> +	smp_mb__before_spinlock();
> +	if (!list_empty_careful(&x->task_list)) {
> +		spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation);
> +		if (continuation) {
> +			list_splice_tail_init(&x->task_list, continuation);
> +		} else {
> +			LIST_HEAD(extra);
> +
> +			do {
> +				__wake_up_locked_key(x, TASK_NORMAL, &extra);
> +
> +				if (list_empty(&extra))
> +					break;
> +
> +				list_splice_tail_init(&extra, &x->task_list);
> +			} while (1);
> +		}
> +		spin_unlock_irqrestore(&x->lock, flags);
> +	}
> +}
> +
> +static void __i915_sw_fence_complete(struct i915_sw_fence *fence,
> +				     struct list_head *continuation)
> +{
> +	if (!atomic_dec_and_test(&fence->pending))
> +		return;
> +
> +	if (fence->flags & I915_SW_FENCE_MASK &&
> +	    __i915_sw_fence_notify(fence) != NOTIFY_DONE)
> +		return;
> +
> +	__i915_sw_fence_wake_up_all(fence, continuation);
> +}
> +
> +static void i915_sw_fence_complete(struct i915_sw_fence *fence)
> +{
> +	if (WARN_ON(i915_sw_fence_done(fence)))
> +		return;
> +
> +	__i915_sw_fence_complete(fence, NULL);
> +}
> +
> +static void i915_sw_fence_await(struct i915_sw_fence *fence)
> +{
> +	WARN_ON(atomic_inc_return(&fence->pending) <= 1);
> +}
> +
> +static void i915_sw_fence_wait(struct i915_sw_fence *fence)
> +{
> +	wait_event(fence->wait, i915_sw_fence_done(fence));
> +}
> +
> +void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn)
> +{
> +	BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK);
> +
> +	init_waitqueue_head(&fence->wait);
> +	kref_init(&fence->kref);
> +	atomic_set(&fence->pending, 1);
> +	fence->flags = (unsigned long)fn;
> +}
> +
> +void i915_sw_fence_commit(struct i915_sw_fence *fence)
> +{
> +	i915_sw_fence_complete(fence);
> +	i915_sw_fence_put(fence);
> +}
> +
> +static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
> +{
> +	list_del(&wq->task_list);
> +	__i915_sw_fence_complete(wq->private, key);
> +	i915_sw_fence_put(wq->private);
> +	kfree(wq);
> +	return 0;
> +}
> +
> +static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
> +				    const struct i915_sw_fence * const signaler)
> +{
> +	wait_queue_t *wq;
> +
> +	if (__test_and_set_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
> +		return false;
> +
> +	if (fence == signaler)
> +		return true;
> +
> +	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
> +		if (wq->func != i915_sw_fence_wake)
> +			continue;
> +
> +		if (__i915_sw_fence_check_if_after(wq->private, signaler))
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
> +static void __i915_sw_fence_clear_checked_bit(struct i915_sw_fence *fence)
> +{
> +	wait_queue_t *wq;
> +
> +	if (!__test_and_clear_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
> +		return;
> +
> +	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
> +		if (wq->func != i915_sw_fence_wake)
> +			continue;
> +
> +		__i915_sw_fence_clear_checked_bit(wq->private);
> +	}
> +}
> +
> +static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
> +				  const struct i915_sw_fence * const signaler)
> +{
> +	unsigned long flags;
> +	bool err;
> +
> +	if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG))
> +		return false;
> +
> +	spin_lock_irqsave(&i915_sw_fence_lock, flags);
> +	err = __i915_sw_fence_check_if_after(fence, signaler);
> +	__i915_sw_fence_clear_checked_bit(fence);
> +	spin_unlock_irqrestore(&i915_sw_fence_lock, flags);
> +
> +	return err;
> +}
> +
> +static wait_queue_t *__i915_sw_fence_create_wq(struct i915_sw_fence *fence, gfp_t gfp)
> +{
> +	wait_queue_t *wq;
> +
> +	wq = kmalloc(sizeof(*wq), gfp);
> +	if (unlikely(!wq))
> +		return NULL;
> +
> +	INIT_LIST_HEAD(&wq->task_list);
> +	wq->flags = 0;
> +	wq->func = i915_sw_fence_wake;
> +	wq->private = i915_sw_fence_get(fence);
> +
> +	i915_sw_fence_await(fence);
> +
> +	return wq;
> +}
> +
> +int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
> +				 struct i915_sw_fence *signaler,
> +				 gfp_t gfp)
> +{
> +	wait_queue_t *wq;
> +	unsigned long flags;
> +	int pending;
> +
> +	if (i915_sw_fence_done(signaler))
> +		return 0;
> +
> +	/* The dependency graph must be acyclic. */
> +	if (unlikely(i915_sw_fence_check_if_after(fence, signaler)))
> +		return -EINVAL;
> +
> +	wq = __i915_sw_fence_create_wq(fence, gfp);
> +	if (unlikely(!wq)) {
> +		if (!gfpflags_allow_blocking(gfp))
> +			return -ENOMEM;
> +
> +		i915_sw_fence_wait(signaler);
> +		return 0;
> +	}
> +
> +	spin_lock_irqsave(&signaler->wait.lock, flags);
> +	if (likely(!i915_sw_fence_done(signaler))) {
> +		__add_wait_queue_tail(&signaler->wait, wq);
> +		pending = 1;
> +	} else {
> +		i915_sw_fence_wake(wq, 0, 0, NULL);
> +		pending = 0;
> +	}
> +	spin_unlock_irqrestore(&signaler->wait.lock, flags);
> +
> +	return pending;
> +}
> +
> +struct dma_fence_cb {
> +	struct fence_cb base;
> +	struct i915_sw_fence *fence;
> +};
> +
> +static void dma_i915_sw_fence_wake(struct fence *dma, struct fence_cb *data)
> +{
> +	struct dma_fence_cb *cb = container_of(data, typeof(*cb), base);
> +
> +	i915_sw_fence_commit(cb->fence);
> +	kfree(cb);
> +}
> +
> +int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
> +				  struct fence *dma, gfp_t gfp)
> +{
> +	struct dma_fence_cb *cb;
> +	int ret;
> +
> +	if (fence_is_signaled(dma))
> +		return 0;
> +
> +	cb = kmalloc(sizeof(*cb), gfp);
> +	if (!cb) {
> +		if (!gfpflags_allow_blocking(gfp))
> +			return -ENOMEM;
> +
> +		return fence_wait(dma, false);
> +	}
> +
> +	cb->fence = i915_sw_fence_get(fence);
> +	i915_sw_fence_await(fence);
> +
> +	ret = fence_add_callback(dma, &cb->base, dma_i915_sw_fence_wake);
> +	if (ret == 0) {
> +		ret = 1;
> +	} else {
> +		dma_i915_sw_fence_wake(dma, &cb->base);
> +		if (ret == -ENOENT) /* fence already signaled */
> +			ret = 0;
> +	}
> +
> +	return ret;
> +}
> +
> +int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
> +				    struct reservation_object *resv,
> +				    const struct fence_ops *exclude,
> +				    bool write,
> +				    gfp_t gfp)
> +{
> +	struct fence *excl, **shared;
> +	unsigned int count, i;
> +	int ret;
> +
> +	ret = reservation_object_get_fences_rcu(resv, &excl, &count, &shared);
> +	if (ret)
> +		return ret;
> +
> +	if (write) {
> +		for (i = 0; i < count; i++) {
> +			if (shared[i]->ops == exclude)
> +				continue;
> +
> +			ret |= i915_sw_fence_await_dma_fence(fence, shared[i], gfp);
> +			if (ret < 0)
> +				goto out;
> +		}
> +	}
> +
> +	if (excl && excl->ops != exclude)
> +		ret |= i915_sw_fence_await_dma_fence(fence, excl, gfp);
> +
> +out:
> +	fence_put(excl);
> +	for (i = 0; i < count; i++)
> +		fence_put(shared[i]);
> +	kfree(shared);
> +
> +	return ret;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
> new file mode 100644
> index 000000000000..ac99c7965436
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.h
> @@ -0,0 +1,56 @@
> +/*
> + * i915_sw_fence.h - library routines for N:M synchronisation points
> + *
> + * Copyright (C) 2016 Intel Corporation
> + *
> + * This file is released under the GPLv2.
> + *
> + */
> +
> +#ifndef _I915_SW_FENCE_H_
> +#define _I915_SW_FENCE_H_
> +
> +#include <linux/gfp.h>
> +#include <linux/kref.h>
> +#include <linux/notifier.h> /* for NOTIFY_DONE */
> +#include <linux/wait.h>
> +
> +struct completion;
> +struct fence;
> +struct fence_ops;
> +struct reservation_object;
> +
> +struct i915_sw_fence {
> +	wait_queue_head_t wait;
> +	unsigned long flags;
> +	struct kref kref;
> +	atomic_t pending;
> +};
> +
> +#define I915_SW_FENCE_CHECKED_BIT	0 /* used internally for DAG checking */
> +#define I915_SW_FENCE_PRIVATE_BIT	1 /* available for use by owner */
> +#define I915_SW_FENCE_MASK		(~3)
> +
> +typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *);
> +#define __i915_sw_fence_call __aligned(4)
> +
> +void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn);
> +void i915_sw_fence_commit(struct i915_sw_fence *fence);
> +
> +int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
> +				 struct i915_sw_fence *after,
> +				 gfp_t gfp);
> +int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
> +				  struct fence *dma, gfp_t gfp);
> +int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
> +			     struct reservation_object *resv,
> +			     const struct fence_ops *exclude,
> +			     bool write,
> +			     gfp_t gfp);
> +
> +static inline bool i915_sw_fence_done(const struct i915_sw_fence *fence)
> +{
> +	return atomic_read(&fence->pending) < 0;
> +}
> +
> +#endif /* _I915_SW_FENCE_H_ */
Chris Wilson Aug. 25, 2016, 10:51 a.m. UTC | #2
On Thu, Aug 25, 2016 at 11:42:41AM +0100, John Harrison wrote:
> I'm not convinced the benefit is worth the
> confusion and maintenance risk. E.g. is unsigned long guaranteed to
> always and ever be sufficient for a pointer?

Yes, unsigned long will always be sufficient for a kernel pointer. I'm
sure there is a classic rant from Linus about this somewhere...
Quite a few core structs will pass around unsigned long instead of
pointers, which can be quite annoying when trying to use!
-Chris
Joonas Lahtinen Aug. 25, 2016, 11:49 a.m. UTC | #3
On to, 2016-08-25 at 10:08 +0100, Chris Wilson wrote:
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> @@ -0,0 +1,329 @@
> +/*
> + * (C) Copyright 2016 Intel Corporation
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; version 2
> + * of the License.
> + */

Copyright notice is rather brief, copy from i915_gem_execbuffer.c

> +static int __i915_sw_fence_notify(struct i915_sw_fence *fence)
> +{
> +	i915_sw_fence_notify_t fn;
> +
> +	fn = (i915_sw_fence_notify_t)(fence->flags & I915_SW_FENCE_MASK);

I'd define an accessor functions in scatterlist.h fashion.

Although I'm not convinced of packing the flags with the function
pointer. How many fences do we expect to be allocated simultaneously on
a real usage scenario?

> +static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
> +				 struct list_head *continuation)
> +{
> +	wait_queue_head_t *x = &fence->wait;
> +	unsigned long flags;
> +
> +	atomic_dec(&fence->pending);
> +
> +	/*
> +	 * To prevent unbounded recursion as we traverse the graph of i915_sw_fences,

Long line due to s/kfence/.../

> +	 * we move the task_list from this the next ready fence to the tail of
> +	 * the original fence's task_list (and so added to the list to be
> +	 * woken).
> +	 */
> +	smp_mb__before_spinlock();
> +	if (!list_empty_careful(&x->task_list)) {

if (list_empty_careful()
	return;

> +static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
> +				    const struct i915_sw_fence * const signaler)
> +{
> +	wait_queue_t *wq;
> +
> +	if (__test_and_set_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
> +		return false;
> +
> +	if (fence == signaler)
> +		return true;

Not sure if I would expect _if_after to return true on an equal
object...

> +int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
> +				    struct reservation_object *resv,
> +				    const struct fence_ops *exclude,
> +				    bool write,
> +				    gfp_t gfp)
> +{
> +	struct fence *excl, **shared;
> +	unsigned int count, i;
> +	int ret;
> +
> +	ret = reservation_object_get_fences_rcu(resv, &excl, &count, &shared);
> +	if (ret)
> +		return ret;
> +
> +	if (write) {
> +		for (i = 0; i < count; i++) {
> +			if (shared[i]->ops == exclude)
> +				continue;
> +
> +			ret |= i915_sw_fence_await_dma_fence(fence, shared[i], gfp);

Do we really want to bitwise here?

> +			if (ret < 0)
> +				goto out;
> +		}
> +	}
> +
> +	if (excl && excl->ops != exclude)
> +		ret |= i915_sw_fence_await_dma_fence(fence, excl, gfp);

Ditto.

> +
> +out:
> +	fence_put(excl);
> +	for (i = 0; i < count; i++)
> +		fence_put(shared[i]);
> +	kfree(shared);
> +
> +	return ret;
> +}

<SNIP>

> +struct i915_sw_fence {
> +	wait_queue_head_t wait;
> +	unsigned long flags;

Name is rather deceiving to contain a callback function.

> +#define __i915_sw_fence_call __aligned(4)

Not packing would get rid of this too...

> +
> +void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn);
> +void i915_sw_fence_commit(struct i915_sw_fence *fence);

Not _signal?

> +static inline bool i915_sw_fence_done(const struct i915_sw_fence *fence)

_is_done() or _is_completed()

Regards, Joonas
Chris Wilson Aug. 25, 2016, 12:25 p.m. UTC | #4
On Thu, Aug 25, 2016 at 02:49:59PM +0300, Joonas Lahtinen wrote:
> On to, 2016-08-25 at 10:08 +0100, Chris Wilson wrote:
> > +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> > @@ -0,0 +1,329 @@
> > +/*
> > + * (C) Copyright 2016 Intel Corporation
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License
> > + * as published by the Free Software Foundation; version 2
> > + * of the License.
> > + */
> 
> Copyright notice is rather brief, copy from i915_gem_execbuffer.c
> 
> > +static int __i915_sw_fence_notify(struct i915_sw_fence *fence)
> > +{
> > +	i915_sw_fence_notify_t fn;
> > +
> > +	fn = (i915_sw_fence_notify_t)(fence->flags & I915_SW_FENCE_MASK);
> 
> I'd define an accessor functions in scatterlist.h fashion.

Hmm, this was meant to be the accessor function - as nowhere else was
meant to be pulling out the fn from the mask. But that can be split out
again.
 
> Although I'm not convinced of packing the flags with the function
> pointer. How many fences do we expect to be allocated simultaneously on
> a real usage scenario?

Several thousand for async init. Tens of thousands per second for i915,
though we only except a few hundred in flight at any time. Ditto for
kms. Ditto for async work. And places I haven't looked at yet.

That's more than there are mutex_init!

> > +static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
> > +				    const struct i915_sw_fence * const signaler)
> > +{
> > +	wait_queue_t *wq;
> > +
> > +	if (__test_and_set_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
> > +		return false;
> > +
> > +	if (fence == signaler)
> > +		return true;
> 
> Not sure if I would expect _if_after to return true on an equal
> object...

True being the error case.

kfence_err_if_after?
kfence_abort_if_after?

> > +int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
> > +				    struct reservation_object *resv,
> > +				    const struct fence_ops *exclude,
> > +				    bool write,
> > +				    gfp_t gfp)
> > +{
> > +	struct fence *excl, **shared;
> > +	unsigned int count, i;
> > +	int ret;
> > +
> > +	ret = reservation_object_get_fences_rcu(resv, &excl, &count, &shared);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (write) {
> > +		for (i = 0; i < count; i++) {
> > +			if (shared[i]->ops == exclude)
> > +				continue;
> > +
> > +			ret |= i915_sw_fence_await_dma_fence(fence, shared[i], gfp);
> 
> Do we really want to bitwise here?

Correctly it should be

int pending = await_dma_fence();
if (pending < 0) {
	ret = pending;
	goto out;
}
ret |= pending;

I've been using the ternary result as a shortcut (if the fence is
passed, we can dispose of it for example). The code becomes much simpler
if we didn't pass that information back - but I think it is worth it.

> > +void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn);
> > +void i915_sw_fence_commit(struct i915_sw_fence *fence);
> 
> Not _signal?

signal is fence_complete() (corresponding to complete(struct
completion))

commit is a convenience function for complete + kref_put. I do like it
as it gives a clear finish to a
	kfence_init(); .. setup ...; kfence_commit()
sequence. But the name is only so-so.

> > +static inline bool i915_sw_fence_done(const struct i915_sw_fence *fence)
> 
> _is_done() or _is_completed()

As above, instructions were to use _done, so that is matched struct
completion as closely as possible. They have already chosen their
interface, we have to try and fit in as close as possible.

I do agree, I much prefer having _is_ in my boolean functions - but
matching struct completion is a must for upstream proposal (imo).
-Chris
Chris Wilson Aug. 25, 2016, 4:52 p.m. UTC | #5
On Thu, Aug 25, 2016 at 02:49:59PM +0300, Joonas Lahtinen wrote:
> > +	 * we move the task_list from this the next ready fence to the tail of
> > +	 * the original fence's task_list (and so added to the list to be
> > +	 * woken).
> > +	 */
> > +	smp_mb__before_spinlock();
> > +	if (!list_empty_careful(&x->task_list)) {
> 
> if (list_empty_careful()
> 	return;

It's just broken. I added it recently after reading

void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
{
        unsigned long flags;

        __set_current_state(TASK_RUNNING);
        /*
         * We can check for list emptiness outside the lock
         * IFF:
         *  - we use the "careful" check that verifies both
         *    the next and prev pointers, so that there cannot
         *    be any half-pending updates in progress on other
         *    CPU's that we haven't seen yet (and that might
         *    still change the stack area.
         * and
         *  - all other users take the lock (ie we can only
         *    have _one_ other CPU that looks at or modifies
         *    the list).
         */
        if (!list_empty_careful(&wait->task_list)) {
                spin_lock_irqsave(&q->lock, flags);
                list_del_init(&wait->task_list);
                spin_unlock_irqrestore(&q->lock, flags);
        }
}

and convinced myself that it was also safe to apply here.
Turns out that spinlock is very hard to avoid.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index a7da24640e88..a998c2bce70a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -16,6 +16,7 @@  i915-y := i915_drv.o \
 	  i915_params.o \
 	  i915_pci.o \
           i915_suspend.o \
+	  i915_sw_fence.o \
 	  i915_sysfs.o \
 	  intel_csr.o \
 	  intel_device_info.o \
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
new file mode 100644
index 000000000000..44a02d836a94
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -0,0 +1,329 @@ 
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/slab.h>
+#include <linux/fence.h>
+#include <linux/reservation.h>
+
+#include "i915_sw_fence.h"
+
+static DEFINE_SPINLOCK(i915_sw_fence_lock);
+
+static int __i915_sw_fence_notify(struct i915_sw_fence *fence)
+{
+	i915_sw_fence_notify_t fn;
+
+	fn = (i915_sw_fence_notify_t)(fence->flags & I915_SW_FENCE_MASK);
+	return fn(fence);
+}
+
+static void i915_sw_fence_free(struct kref *kref)
+{
+	struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
+
+	WARN_ON(atomic_read(&fence->pending) > 0);
+
+	if (fence->flags & I915_SW_FENCE_MASK)
+		WARN_ON(__i915_sw_fence_notify(fence) != NOTIFY_DONE);
+	else
+		kfree(fence);
+}
+
+static void i915_sw_fence_put(struct i915_sw_fence *fence)
+{
+	kref_put(&fence->kref, i915_sw_fence_free);
+}
+
+static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence)
+{
+	kref_get(&fence->kref);
+	return fence;
+}
+
+static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
+				 struct list_head *continuation)
+{
+	wait_queue_head_t *x = &fence->wait;
+	unsigned long flags;
+
+	atomic_dec(&fence->pending);
+
+	/*
+	 * To prevent unbounded recursion as we traverse the graph of i915_sw_fences,
+	 * we move the task_list from this the next ready fence to the tail of
+	 * the original fence's task_list (and so added to the list to be
+	 * woken).
+	 */
+	smp_mb__before_spinlock();
+	if (!list_empty_careful(&x->task_list)) {
+		spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation);
+		if (continuation) {
+			list_splice_tail_init(&x->task_list, continuation);
+		} else {
+			LIST_HEAD(extra);
+
+			do {
+				__wake_up_locked_key(x, TASK_NORMAL, &extra);
+
+				if (list_empty(&extra))
+					break;
+
+				list_splice_tail_init(&extra, &x->task_list);
+			} while (1);
+		}
+		spin_unlock_irqrestore(&x->lock, flags);
+	}
+}
+
+static void __i915_sw_fence_complete(struct i915_sw_fence *fence,
+				     struct list_head *continuation)
+{
+	if (!atomic_dec_and_test(&fence->pending))
+		return;
+
+	if (fence->flags & I915_SW_FENCE_MASK &&
+	    __i915_sw_fence_notify(fence) != NOTIFY_DONE)
+		return;
+
+	__i915_sw_fence_wake_up_all(fence, continuation);
+}
+
+static void i915_sw_fence_complete(struct i915_sw_fence *fence)
+{
+	if (WARN_ON(i915_sw_fence_done(fence)))
+		return;
+
+	__i915_sw_fence_complete(fence, NULL);
+}
+
+static void i915_sw_fence_await(struct i915_sw_fence *fence)
+{
+	WARN_ON(atomic_inc_return(&fence->pending) <= 1);
+}
+
+static void i915_sw_fence_wait(struct i915_sw_fence *fence)
+{
+	wait_event(fence->wait, i915_sw_fence_done(fence));
+}
+
+void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn)
+{
+	BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK);
+
+	init_waitqueue_head(&fence->wait);
+	kref_init(&fence->kref);
+	atomic_set(&fence->pending, 1);
+	fence->flags = (unsigned long)fn;
+}
+
+void i915_sw_fence_commit(struct i915_sw_fence *fence)
+{
+	i915_sw_fence_complete(fence);
+	i915_sw_fence_put(fence);
+}
+
+static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
+{
+	list_del(&wq->task_list);
+	__i915_sw_fence_complete(wq->private, key);
+	i915_sw_fence_put(wq->private);
+	kfree(wq);
+	return 0;
+}
+
+static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
+				    const struct i915_sw_fence * const signaler)
+{
+	wait_queue_t *wq;
+
+	if (__test_and_set_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
+		return false;
+
+	if (fence == signaler)
+		return true;
+
+	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+		if (wq->func != i915_sw_fence_wake)
+			continue;
+
+		if (__i915_sw_fence_check_if_after(wq->private, signaler))
+			return true;
+	}
+
+	return false;
+}
+
+static void __i915_sw_fence_clear_checked_bit(struct i915_sw_fence *fence)
+{
+	wait_queue_t *wq;
+
+	if (!__test_and_clear_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
+		return;
+
+	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+		if (wq->func != i915_sw_fence_wake)
+			continue;
+
+		__i915_sw_fence_clear_checked_bit(wq->private);
+	}
+}
+
+static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
+				  const struct i915_sw_fence * const signaler)
+{
+	unsigned long flags;
+	bool err;
+
+	if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG))
+		return false;
+
+	spin_lock_irqsave(&i915_sw_fence_lock, flags);
+	err = __i915_sw_fence_check_if_after(fence, signaler);
+	__i915_sw_fence_clear_checked_bit(fence);
+	spin_unlock_irqrestore(&i915_sw_fence_lock, flags);
+
+	return err;
+}
+
+static wait_queue_t *__i915_sw_fence_create_wq(struct i915_sw_fence *fence, gfp_t gfp)
+{
+	wait_queue_t *wq;
+
+	wq = kmalloc(sizeof(*wq), gfp);
+	if (unlikely(!wq))
+		return NULL;
+
+	INIT_LIST_HEAD(&wq->task_list);
+	wq->flags = 0;
+	wq->func = i915_sw_fence_wake;
+	wq->private = i915_sw_fence_get(fence);
+
+	i915_sw_fence_await(fence);
+
+	return wq;
+}
+
+int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
+				 struct i915_sw_fence *signaler,
+				 gfp_t gfp)
+{
+	wait_queue_t *wq;
+	unsigned long flags;
+	int pending;
+
+	if (i915_sw_fence_done(signaler))
+		return 0;
+
+	/* The dependency graph must be acyclic. */
+	if (unlikely(i915_sw_fence_check_if_after(fence, signaler)))
+		return -EINVAL;
+
+	wq = __i915_sw_fence_create_wq(fence, gfp);
+	if (unlikely(!wq)) {
+		if (!gfpflags_allow_blocking(gfp))
+			return -ENOMEM;
+
+		i915_sw_fence_wait(signaler);
+		return 0;
+	}
+
+	spin_lock_irqsave(&signaler->wait.lock, flags);
+	if (likely(!i915_sw_fence_done(signaler))) {
+		__add_wait_queue_tail(&signaler->wait, wq);
+		pending = 1;
+	} else {
+		i915_sw_fence_wake(wq, 0, 0, NULL);
+		pending = 0;
+	}
+	spin_unlock_irqrestore(&signaler->wait.lock, flags);
+
+	return pending;
+}
+
+struct dma_fence_cb {
+	struct fence_cb base;
+	struct i915_sw_fence *fence;
+};
+
+static void dma_i915_sw_fence_wake(struct fence *dma, struct fence_cb *data)
+{
+	struct dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+	i915_sw_fence_commit(cb->fence);
+	kfree(cb);
+}
+
+int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				  struct fence *dma, gfp_t gfp)
+{
+	struct dma_fence_cb *cb;
+	int ret;
+
+	if (fence_is_signaled(dma))
+		return 0;
+
+	cb = kmalloc(sizeof(*cb), gfp);
+	if (!cb) {
+		if (!gfpflags_allow_blocking(gfp))
+			return -ENOMEM;
+
+		return fence_wait(dma, false);
+	}
+
+	cb->fence = i915_sw_fence_get(fence);
+	i915_sw_fence_await(fence);
+
+	ret = fence_add_callback(dma, &cb->base, dma_i915_sw_fence_wake);
+	if (ret == 0) {
+		ret = 1;
+	} else {
+		dma_i915_sw_fence_wake(dma, &cb->base);
+		if (ret == -ENOENT) /* fence already signaled */
+			ret = 0;
+	}
+
+	return ret;
+}
+
+int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
+				    struct reservation_object *resv,
+				    const struct fence_ops *exclude,
+				    bool write,
+				    gfp_t gfp)
+{
+	struct fence *excl, **shared;
+	unsigned int count, i;
+	int ret;
+
+	ret = reservation_object_get_fences_rcu(resv, &excl, &count, &shared);
+	if (ret)
+		return ret;
+
+	if (write) {
+		for (i = 0; i < count; i++) {
+			if (shared[i]->ops == exclude)
+				continue;
+
+			ret |= i915_sw_fence_await_dma_fence(fence, shared[i], gfp);
+			if (ret < 0)
+				goto out;
+		}
+	}
+
+	if (excl && excl->ops != exclude)
+		ret |= i915_sw_fence_await_dma_fence(fence, excl, gfp);
+
+out:
+	fence_put(excl);
+	for (i = 0; i < count; i++)
+		fence_put(shared[i]);
+	kfree(shared);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
new file mode 100644
index 000000000000..ac99c7965436
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -0,0 +1,56 @@ 
+/*
+ * i915_sw_fence.h - library routines for N:M synchronisation points
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#ifndef _I915_SW_FENCE_H_
+#define _I915_SW_FENCE_H_
+
+#include <linux/gfp.h>
+#include <linux/kref.h>
+#include <linux/notifier.h> /* for NOTIFY_DONE */
+#include <linux/wait.h>
+
+struct completion;
+struct fence;
+struct fence_ops;
+struct reservation_object;
+
+struct i915_sw_fence {
+	wait_queue_head_t wait;
+	unsigned long flags;
+	struct kref kref;
+	atomic_t pending;
+};
+
+#define I915_SW_FENCE_CHECKED_BIT	0 /* used internally for DAG checking */
+#define I915_SW_FENCE_PRIVATE_BIT	1 /* available for use by owner */
+#define I915_SW_FENCE_MASK		(~3)
+
+typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *);
+#define __i915_sw_fence_call __aligned(4)
+
+void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn);
+void i915_sw_fence_commit(struct i915_sw_fence *fence);
+
+int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
+				 struct i915_sw_fence *after,
+				 gfp_t gfp);
+int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				  struct fence *dma, gfp_t gfp);
+int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
+			     struct reservation_object *resv,
+			     const struct fence_ops *exclude,
+			     bool write,
+			     gfp_t gfp);
+
+static inline bool i915_sw_fence_done(const struct i915_sw_fence *fence)
+{
+	return atomic_read(&fence->pending) < 0;
+}
+
+#endif /* _I915_SW_FENCE_H_ */