diff mbox series

[6/6] dma-fence: Store the timestamp in the same union as the cb_list

Message ID 20190817144736.7826-6-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/6] dma-buf: Introduce selftesting framework | expand

Commit Message

Chris Wilson Aug. 17, 2019, 2:47 p.m. UTC
The timestamp and the cb_list are mutually exclusive, the cb_list can
only be added to prior to being signaled (and once signaled we drain),
while the timestamp is only valid upon being signaled. Both the
timestamp and the cb_list are only valid while the fence is alive, and
as soon as no references are held can be replaced by the rcu_head.

By reusing the union for the timestamp, we squeeze the base dma_fence
struct to 64 bytes on x86-64.

Suggested-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-fence.c                 | 16 +++++++++-------
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 13 +++++++------
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c       |  3 +++
 include/linux/dma-fence.h                   | 17 +++++++++++++----
 4 files changed, 32 insertions(+), 17 deletions(-)

Comments

Christian König Aug. 17, 2019, 3:20 p.m. UTC | #1
Am 17.08.19 um 16:47 schrieb Chris Wilson:
> The timestamp and the cb_list are mutually exclusive, the cb_list can
> only be added to prior to being signaled (and once signaled we drain),
> while the timestamp is only valid upon being signaled. Both the
> timestamp and the cb_list are only valid while the fence is alive, and
> as soon as no references are held can be replaced by the rcu_head.
>
> By reusing the union for the timestamp, we squeeze the base dma_fence
> struct to 64 bytes on x86-64.
>
> Suggested-by: Christian König <christian.koenig@amd.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Christian König <christian.koenig@amd.com>
> ---
>   drivers/dma-buf/dma-fence.c                 | 16 +++++++++-------
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 13 +++++++------
>   drivers/gpu/drm/vmwgfx/vmwgfx_fence.c       |  3 +++
>   include/linux/dma-fence.h                   | 17 +++++++++++++----
>   4 files changed, 32 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index 89d96e3e6df6..2c21115b1a37 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -129,6 +129,7 @@ EXPORT_SYMBOL(dma_fence_context_alloc);
>   int dma_fence_signal_locked(struct dma_fence *fence)
>   {
>   	struct dma_fence_cb *cur, *tmp;
> +	struct list_head cb_list;
>   
>   	lockdep_assert_held(fence->lock);
>   
> @@ -136,16 +137,16 @@ int dma_fence_signal_locked(struct dma_fence *fence)
>   				      &fence->flags)))
>   		return -EINVAL;
>   
> +	/* Stash the cb_list before replacing it with the timestamp */
> +	list_replace(&fence->cb_list, &cb_list);

Stashing the timestamp instead is probably less bytes to modify.

> +
>   	fence->timestamp = ktime_get();
>   	set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
>   	trace_dma_fence_signaled(fence);
>   
> -	if (!list_empty(&fence->cb_list)) {
> -		list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
> -			INIT_LIST_HEAD(&cur->node);
> -			cur->func(fence, cur);
> -		}
> -		INIT_LIST_HEAD(&fence->cb_list);
> +	list_for_each_entry_safe(cur, tmp, &cb_list, node) {
> +		INIT_LIST_HEAD(&cur->node);
> +		cur->func(fence, cur);
>   	}
>   
>   	return 0;
> @@ -234,7 +235,8 @@ void dma_fence_release(struct kref *kref)
>   
>   	trace_dma_fence_destroy(fence);
>   
> -	if (WARN(!list_empty(&fence->cb_list),
> +	if (WARN(!list_empty(&fence->cb_list) &&
> +		 !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags),
>   		 "Fence %s:%s:%llx:%llx released with pending signals!\n",
>   		 fence->ops->get_driver_name(fence),
>   		 fence->ops->get_timeline_name(fence),
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index 2bc9c460e78d..09c68dda2098 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -114,18 +114,18 @@ __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
>   }
>   
>   static void
> -__dma_fence_signal__notify(struct dma_fence *fence)
> +__dma_fence_signal__notify(struct dma_fence *fence,
> +			   const struct list_head *list)
>   {
>   	struct dma_fence_cb *cur, *tmp;
>   
>   	lockdep_assert_held(fence->lock);
>   	lockdep_assert_irqs_disabled();
>   
> -	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
> +	list_for_each_entry_safe(cur, tmp, list, node) {
>   		INIT_LIST_HEAD(&cur->node);
>   		cur->func(fence, cur);
>   	}
> -	INIT_LIST_HEAD(&fence->cb_list);
>   }
>   
>   void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
> @@ -187,11 +187,12 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   	list_for_each_safe(pos, next, &signal) {
>   		struct i915_request *rq =
>   			list_entry(pos, typeof(*rq), signal_link);
> -
> -		__dma_fence_signal__timestamp(&rq->fence, timestamp);
> +		struct list_head cb_list;
>   
>   		spin_lock(&rq->lock);
> -		__dma_fence_signal__notify(&rq->fence);
> +		list_replace(&rq->fence.cb_list, &cb_list);
> +		__dma_fence_signal__timestamp(&rq->fence, timestamp);
> +		__dma_fence_signal__notify(&rq->fence, &cb_list);
>   		spin_unlock(&rq->lock);
>   
>   		i915_request_put(rq);
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> index 434dfadb0e52..178a6cd1a06f 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> @@ -185,6 +185,9 @@ static long vmw_fence_wait(struct dma_fence *f, bool intr, signed long timeout)
>   
>   	spin_lock(f->lock);
>   
> +	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &f->flags))
> +		goto out;
> +
>   	if (intr && signal_pending(current)) {
>   		ret = -ERESTARTSYS;
>   		goto out;
> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> index 2ce4d877d33e..6c36502a0562 100644
> --- a/include/linux/dma-fence.h
> +++ b/include/linux/dma-fence.h
> @@ -65,17 +65,26 @@ struct dma_fence_cb;
>   struct dma_fence {
>   	spinlock_t *lock;
>   	const struct dma_fence_ops *ops;
> -	/* We clear the callback list on kref_put so that by the time we
> -	 * release the fence it is unused. No one should be adding to the cb_list
> -	 * that they don't themselves hold a reference for.
> +	/*
> +	 * We clear the callback list on kref_put so that by the time we
> +	 * release the fence it is unused. No one should be adding to the
> +	 * cb_list that they don't themselves hold a reference for.
> +	 *
> +	 * The lifetime of the timestamp is similarly tied to both the
> +	 * rcu freelist and the cb_list. The timestamp is only set upon
> +	 * signaling while simultaneously notifying the cb_list. Ergo, we
> +	 * only use either the cb_list of timestamp. Upon destruction,
> +	 * neither are accessible, and so we can use the rcu. This means
> +	 * that the cb_list is *only* valid until the signal bit is set,
> +	 * and to read either you *must* hold a reference to the fence.
>   	 */
>   	union {
>   		struct rcu_head rcu;
>   		struct list_head cb_list;
> +		ktime_t timestamp;

Maybe sort this cb_list, timestamp, rcu to better note in which order it 
is used in the lifetime of a fence.

Apart from that looks good to me,
Christian.

>   	};
>   	u64 context;
>   	u64 seqno;
> -	ktime_t timestamp;
>   	unsigned long flags;
>   	struct kref refcount;
>   	int error;
Chris Wilson Aug. 17, 2019, 3:27 p.m. UTC | #2
Quoting Koenig, Christian (2019-08-17 16:20:12)
> Am 17.08.19 um 16:47 schrieb Chris Wilson:
> > diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> > index 89d96e3e6df6..2c21115b1a37 100644
> > --- a/drivers/dma-buf/dma-fence.c
> > +++ b/drivers/dma-buf/dma-fence.c
> > @@ -129,6 +129,7 @@ EXPORT_SYMBOL(dma_fence_context_alloc);
> >   int dma_fence_signal_locked(struct dma_fence *fence)
> >   {
> >       struct dma_fence_cb *cur, *tmp;
> > +     struct list_head cb_list;
> >   
> >       lockdep_assert_held(fence->lock);
> >   
> > @@ -136,16 +137,16 @@ int dma_fence_signal_locked(struct dma_fence *fence)
> >                                     &fence->flags)))
> >               return -EINVAL;
> >   
> > +     /* Stash the cb_list before replacing it with the timestamp */
> > +     list_replace(&fence->cb_list, &cb_list);
> 
> Stashing the timestamp instead is probably less bytes to modify.

My thinking was to pass the timestamp to the notify callbacks, we need
to stash the list and set the timestamp first.

Nothing that I'm aware of uses the timestamp (just the sync file debug
which weston was considering using at one point)... So I guess we don't
care? But I would say we should do that as a separate step in case
someone does.
-Chris
Christian König Aug. 17, 2019, 3:31 p.m. UTC | #3
Am 17.08.19 um 17:27 schrieb Chris Wilson:
> Quoting Koenig, Christian (2019-08-17 16:20:12)
>> Am 17.08.19 um 16:47 schrieb Chris Wilson:
>>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
>>> index 89d96e3e6df6..2c21115b1a37 100644
>>> --- a/drivers/dma-buf/dma-fence.c
>>> +++ b/drivers/dma-buf/dma-fence.c
>>> @@ -129,6 +129,7 @@ EXPORT_SYMBOL(dma_fence_context_alloc);
>>>    int dma_fence_signal_locked(struct dma_fence *fence)
>>>    {
>>>        struct dma_fence_cb *cur, *tmp;
>>> +     struct list_head cb_list;
>>>    
>>>        lockdep_assert_held(fence->lock);
>>>    
>>> @@ -136,16 +137,16 @@ int dma_fence_signal_locked(struct dma_fence *fence)
>>>                                      &fence->flags)))
>>>                return -EINVAL;
>>>    
>>> +     /* Stash the cb_list before replacing it with the timestamp */
>>> +     list_replace(&fence->cb_list, &cb_list);
>> Stashing the timestamp instead is probably less bytes to modify.
> My thinking was to pass the timestamp to the notify callbacks, we need
> to stash the list and set the timestamp first.

I don't see much of a reason for callbacks to use the timestamp, they 
could just call ktime_get() and would most likely get the same or at 
least a very close by value.

> Nothing that I'm aware of uses the timestamp (just the sync file debug
> which weston was considering using at one point)... So I guess we don't
> care? But I would say we should do that as a separate step in case
> someone does.

Yeah, agree.

Christian.

> -Chris
diff mbox series

Patch

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 89d96e3e6df6..2c21115b1a37 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -129,6 +129,7 @@  EXPORT_SYMBOL(dma_fence_context_alloc);
 int dma_fence_signal_locked(struct dma_fence *fence)
 {
 	struct dma_fence_cb *cur, *tmp;
+	struct list_head cb_list;
 
 	lockdep_assert_held(fence->lock);
 
@@ -136,16 +137,16 @@  int dma_fence_signal_locked(struct dma_fence *fence)
 				      &fence->flags)))
 		return -EINVAL;
 
+	/* Stash the cb_list before replacing it with the timestamp */
+	list_replace(&fence->cb_list, &cb_list);
+
 	fence->timestamp = ktime_get();
 	set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
 	trace_dma_fence_signaled(fence);
 
-	if (!list_empty(&fence->cb_list)) {
-		list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
-			INIT_LIST_HEAD(&cur->node);
-			cur->func(fence, cur);
-		}
-		INIT_LIST_HEAD(&fence->cb_list);
+	list_for_each_entry_safe(cur, tmp, &cb_list, node) {
+		INIT_LIST_HEAD(&cur->node);
+		cur->func(fence, cur);
 	}
 
 	return 0;
@@ -234,7 +235,8 @@  void dma_fence_release(struct kref *kref)
 
 	trace_dma_fence_destroy(fence);
 
-	if (WARN(!list_empty(&fence->cb_list),
+	if (WARN(!list_empty(&fence->cb_list) &&
+		 !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags),
 		 "Fence %s:%s:%llx:%llx released with pending signals!\n",
 		 fence->ops->get_driver_name(fence),
 		 fence->ops->get_timeline_name(fence),
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 2bc9c460e78d..09c68dda2098 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -114,18 +114,18 @@  __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
 }
 
 static void
-__dma_fence_signal__notify(struct dma_fence *fence)
+__dma_fence_signal__notify(struct dma_fence *fence,
+			   const struct list_head *list)
 {
 	struct dma_fence_cb *cur, *tmp;
 
 	lockdep_assert_held(fence->lock);
 	lockdep_assert_irqs_disabled();
 
-	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
+	list_for_each_entry_safe(cur, tmp, list, node) {
 		INIT_LIST_HEAD(&cur->node);
 		cur->func(fence, cur);
 	}
-	INIT_LIST_HEAD(&fence->cb_list);
 }
 
 void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
@@ -187,11 +187,12 @@  void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 	list_for_each_safe(pos, next, &signal) {
 		struct i915_request *rq =
 			list_entry(pos, typeof(*rq), signal_link);
-
-		__dma_fence_signal__timestamp(&rq->fence, timestamp);
+		struct list_head cb_list;
 
 		spin_lock(&rq->lock);
-		__dma_fence_signal__notify(&rq->fence);
+		list_replace(&rq->fence.cb_list, &cb_list);
+		__dma_fence_signal__timestamp(&rq->fence, timestamp);
+		__dma_fence_signal__notify(&rq->fence, &cb_list);
 		spin_unlock(&rq->lock);
 
 		i915_request_put(rq);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 434dfadb0e52..178a6cd1a06f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -185,6 +185,9 @@  static long vmw_fence_wait(struct dma_fence *f, bool intr, signed long timeout)
 
 	spin_lock(f->lock);
 
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &f->flags))
+		goto out;
+
 	if (intr && signal_pending(current)) {
 		ret = -ERESTARTSYS;
 		goto out;
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 2ce4d877d33e..6c36502a0562 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -65,17 +65,26 @@  struct dma_fence_cb;
 struct dma_fence {
 	spinlock_t *lock;
 	const struct dma_fence_ops *ops;
-	/* We clear the callback list on kref_put so that by the time we
-	 * release the fence it is unused. No one should be adding to the cb_list
-	 * that they don't themselves hold a reference for.
+	/*
+	 * We clear the callback list on kref_put so that by the time we
+	 * release the fence it is unused. No one should be adding to the
+	 * cb_list that they don't themselves hold a reference for.
+	 *
+	 * The lifetime of the timestamp is similarly tied to both the
+	 * rcu freelist and the cb_list. The timestamp is only set upon
+	 * signaling while simultaneously notifying the cb_list. Ergo, we
+	 * only use either the cb_list of timestamp. Upon destruction,
+	 * neither are accessible, and so we can use the rcu. This means
+	 * that the cb_list is *only* valid until the signal bit is set,
+	 * and to read either you *must* hold a reference to the fence.
 	 */
 	union {
 		struct rcu_head rcu;
 		struct list_head cb_list;
+		ktime_t timestamp;
 	};
 	u64 context;
 	u64 seqno;
-	ktime_t timestamp;
 	unsigned long flags;
 	struct kref refcount;
 	int error;