diff mbox

[RFC] drm: rework flip-work helpers to avoid calling func when the FIFO is full

Message ID 1405091821-31839-1-git-send-email-boris.brezillon@free-electrons.com (mailing list archive)
State New, archived
Headers show

Commit Message

Boris BREZILLON July 11, 2014, 3:17 p.m. UTC
Make use of lists instead of kfifo in order to dynamically allocate
task entry when someone require some delayed work, and thus preventing
drm_flip_work_queue from directly calling func instead of queuing this
call.
This allow drm_flip_work_queue to be safely called even within irq
handlers.

Add new helper functions to allocate a flip work task and queue it when
needed. This prevents allocating data within irq context (which might
impact the time spent in the irq handler).

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
---
Hi Rob,

This is a proposal for what you suggested (dynamically growing the drm
flip work queue in order to avoid direct call of work->func when calling
drm_flip_work_queue).

I'm not sure this is exactly what you expected, because I'm now using
lists instead of kfifo (and thus lose the lockless part), but at least
we can now safely call drm_flip_work_queue or drm_flip_work_queue_task
from irq handlers :-).

You were also worried about queueing the same framebuffer multiple times
and with this implementation you shouldn't have any problem (at least with
drm_flip_work_queue, what people do with drm_flip_work_queue_task is their
own responsability, but they should allocate one task for each operation
even if they are manipulating the same framebuffer).

This is just a suggestion, so don't hesitate to tell me that it doesn't
match your expectations.

Best Regards,

Boris

 drivers/gpu/drm/drm_flip_work.c | 95 ++++++++++++++++++++++++++++++-----------
 include/drm/drm_flip_work.h     | 29 +++++++++----
 2 files changed, 92 insertions(+), 32 deletions(-)

Comments

Rob Clark July 11, 2014, 3:41 p.m. UTC | #1
On Fri, Jul 11, 2014 at 11:17 AM, Boris BREZILLON
<boris.brezillon@free-electrons.com> wrote:
> Make use of lists instead of kfifo in order to dynamically allocate
> task entry when someone require some delayed work, and thus preventing
> drm_flip_work_queue from directly calling func instead of queuing this
> call.
> This allow drm_flip_work_queue to be safely called even within irq
> handlers.
>
> Add new helper functions to allocate a flip work task and queue it when
> needed. This prevents allocating data within irq context (which might
> impact the time spent in the irq handler).
>
> Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
> ---
> Hi Rob,
>
> This is a proposal for what you suggested (dynamically growing the drm
> flip work queue in order to avoid direct call of work->func when calling
> drm_flip_work_queue).
>
> I'm not sure this is exactly what you expected, because I'm now using
> lists instead of kfifo (and thus lose the lockless part), but at least
> we can now safely call drm_flip_work_queue or drm_flip_work_queue_task
> from irq handlers :-).
>
> You were also worried about queueing the same framebuffer multiple times
> and with this implementation you shouldn't have any problem (at least with
> drm_flip_work_queue, what people do with drm_flip_work_queue_task is their
> own responsability, but they should allocate one task for each operation
> even if they are manipulating the same framebuffer).

yeah, if we are dynamically allocating the list nodes, that solves the
queuing-up-multiple-times issue..

I wonder if drm_flip_work_allocate_task() should use GPF_ATOMIC when
allocating?  I guess maybe it is possible to pre-allocate the task
from non-irq context, and then queue it from irq context.. it makes
the API a bit more complex, but there are only a couple users
currently, so I suppose this should be doable.

BR,
-R

> This is just a suggestion, so don't hesitate to tell me that it doesn't
> match your expectations.
>
> Best Regards,
>
> Boris
>
>  drivers/gpu/drm/drm_flip_work.c | 95 ++++++++++++++++++++++++++++++-----------
>  include/drm/drm_flip_work.h     | 29 +++++++++----
>  2 files changed, 92 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_flip_work.c b/drivers/gpu/drm/drm_flip_work.c
> index f9c7fa3..21d5715 100644
> --- a/drivers/gpu/drm/drm_flip_work.c
> +++ b/drivers/gpu/drm/drm_flip_work.c
> @@ -25,6 +25,43 @@
>  #include "drm_flip_work.h"
>
>  /**
> + * drm_flip_work_allocate_task - allocate a flip-work task
> + * @data: data associated to the task
> + *
> + * Allocate a drm_flip_task object and attach private data to it.
> + */
> +struct drm_flip_task *drm_flip_work_allocate_task(void *data)
> +{
> +       struct drm_flip_task *task;
> +
> +       task = kzalloc(sizeof(*task), GFP_KERNEL);
> +       if (task)
> +               task->data = data;
> +
> +       return task;
> +}
> +EXPORT_SYMBOL(drm_flip_work_allocate_task);
> +
> +/**
> + * drm_flip_work_queue_task - queue a specific task
> + * @work: the flip-work
> + * @task: the task to handle
> + *
> + * Queues task, that will later be run (passed back to drm_flip_func_t
> + * func) on a work queue after drm_flip_work_commit() is called.
> + */
> +void drm_flip_work_queue_task(struct drm_flip_work *work,
> +                             struct drm_flip_task *task)
> +{
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&work->lock, flags);
> +       list_add_tail(&task->node, &work->queued);
> +       spin_unlock_irqrestore(&work->lock, flags);
> +}
> +EXPORT_SYMBOL(drm_flip_work_queue_task);
> +
> +/**
>   * drm_flip_work_queue - queue work
>   * @work: the flip-work
>   * @val: the value to queue
> @@ -34,10 +71,14 @@
>   */
>  void drm_flip_work_queue(struct drm_flip_work *work, void *val)
>  {
> -       if (kfifo_put(&work->fifo, val)) {
> -               atomic_inc(&work->pending);
> +       struct drm_flip_task *task;
> +
> +       task = kzalloc(sizeof(*task), GFP_KERNEL);
> +       if (task) {
> +               task->data = val;
> +               drm_flip_work_queue_task(work, task);
>         } else {
> -               DRM_ERROR("%s fifo full!\n", work->name);
> +               DRM_ERROR("%s could not allocate task!\n", work->name);
>                 work->func(work, val);
>         }
>  }
> @@ -56,9 +97,12 @@ EXPORT_SYMBOL(drm_flip_work_queue);
>  void drm_flip_work_commit(struct drm_flip_work *work,
>                 struct workqueue_struct *wq)
>  {
> -       uint32_t pending = atomic_read(&work->pending);
> -       atomic_add(pending, &work->count);
> -       atomic_sub(pending, &work->pending);
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&work->lock, flags);
> +       list_splice_tail(&work->queued, &work->commited);
> +       INIT_LIST_HEAD(&work->queued);
> +       spin_unlock_irqrestore(&work->lock, flags);
>         queue_work(wq, &work->worker);
>  }
>  EXPORT_SYMBOL(drm_flip_work_commit);
> @@ -66,14 +110,26 @@ EXPORT_SYMBOL(drm_flip_work_commit);
>  static void flip_worker(struct work_struct *w)
>  {
>         struct drm_flip_work *work = container_of(w, struct drm_flip_work, worker);
> -       uint32_t count = atomic_read(&work->count);
> -       void *val = NULL;
> +       struct list_head tasks;
> +       unsigned long flags;
>
> -       atomic_sub(count, &work->count);
> +       while (1) {
> +               struct drm_flip_task *task, *tmp;
>
> -       while(count--)
> -               if (!WARN_ON(!kfifo_get(&work->fifo, &val)))
> -                       work->func(work, val);
> +               INIT_LIST_HEAD(&tasks);
> +               spin_lock_irqsave(&work->lock, flags);
> +               list_splice_tail(&work->commited, &tasks);
> +               INIT_LIST_HEAD(&work->commited);
> +               spin_unlock_irqrestore(&work->lock, flags);
> +
> +               if (list_empty(&tasks))
> +                       break;
> +
> +               list_for_each_entry_safe(task, tmp, &tasks, node) {
> +                       work->func(work, task->data);
> +                       kfree(task);
> +               }
> +       }
>  }
>
>  /**
> @@ -91,19 +147,11 @@ static void flip_worker(struct work_struct *w)
>  int drm_flip_work_init(struct drm_flip_work *work, int size,
>                 const char *name, drm_flip_func_t func)
>  {
> -       int ret;
> -
>         work->name = name;
> -       atomic_set(&work->count, 0);
> -       atomic_set(&work->pending, 0);
> +       INIT_LIST_HEAD(&work->queued);
> +       INIT_LIST_HEAD(&work->commited);
>         work->func = func;
>
> -       ret = kfifo_alloc(&work->fifo, size, GFP_KERNEL);
> -       if (ret) {
> -               DRM_ERROR("could not allocate %s fifo\n", name);
> -               return ret;
> -       }
> -
>         INIT_WORK(&work->worker, flip_worker);
>
>         return 0;
> @@ -118,7 +166,6 @@ EXPORT_SYMBOL(drm_flip_work_init);
>   */
>  void drm_flip_work_cleanup(struct drm_flip_work *work)
>  {
> -       WARN_ON(!kfifo_is_empty(&work->fifo));
> -       kfifo_free(&work->fifo);
> +       WARN_ON(!list_empty(&work->queued) || !list_empty(&work->commited));
>  }
>  EXPORT_SYMBOL(drm_flip_work_cleanup);
> diff --git a/include/drm/drm_flip_work.h b/include/drm/drm_flip_work.h
> index 9eed34d..549981f 100644
> --- a/include/drm/drm_flip_work.h
> +++ b/include/drm/drm_flip_work.h
> @@ -25,6 +25,7 @@
>  #define DRM_FLIP_WORK_H
>
>  #include <linux/kfifo.h>
> +#include <linux/spinlock.h>
>  #include <linux/workqueue.h>
>
>  /**
> @@ -32,9 +33,7 @@
>   *
>   * Util to queue up work to run from work-queue context after flip/vblank.
>   * Typically this can be used to defer unref of framebuffer's, cursor
> - * bo's, etc until after vblank.  The APIs are all safe (and lockless)
> - * for up to one producer and once consumer at a time.  The single-consumer
> - * aspect is ensured by committing the queued work to a single work-queue.
> + * bo's, etc until after vblank.  The APIs are all safe.
>   */
>
>  struct drm_flip_work;
> @@ -51,22 +50,36 @@ struct drm_flip_work;
>  typedef void (*drm_flip_func_t)(struct drm_flip_work *work, void *val);
>
>  /**
> + * struct drm_flip_task - flip work task
> + * @node: list entry element
> + * @data: data to pass to work->func
> + */
> +struct drm_flip_task {
> +       struct list_head node;
> +       void *data;
> +};
> +
> +/**
>   * struct drm_flip_work - flip work queue
>   * @name: debug name
> - * @pending: number of queued but not committed items
> - * @count: number of committed items
>   * @func: callback fxn called for each committed item
>   * @worker: worker which calls @func
> - * @fifo: queue of committed items
> + * @queued: queued tasks
> + * @commited: commited tasks
> + * @lock: lock to access queued and commited lists
>   */
>  struct drm_flip_work {
>         const char *name;
> -       atomic_t pending, count;
>         drm_flip_func_t func;
>         struct work_struct worker;
> -       DECLARE_KFIFO_PTR(fifo, void *);
> +       struct list_head queued;
> +       struct list_head commited;
> +       spinlock_t lock;
>  };
>
> +struct drm_flip_task *drm_flip_work_allocate_task(void *data);
> +void drm_flip_work_queue_task(struct drm_flip_work *work,
> +                             struct drm_flip_task *task);
>  void drm_flip_work_queue(struct drm_flip_work *work, void *val);
>  void drm_flip_work_commit(struct drm_flip_work *work,
>                 struct workqueue_struct *wq);
> --
> 1.8.3.2
>
Boris BREZILLON July 11, 2014, 3:47 p.m. UTC | #2
On Fri, 11 Jul 2014 11:41:12 -0400
Rob Clark <robdclark@gmail.com> wrote:

> On Fri, Jul 11, 2014 at 11:17 AM, Boris BREZILLON
> <boris.brezillon@free-electrons.com> wrote:
> > Make use of lists instead of kfifo in order to dynamically allocate
> > task entry when someone require some delayed work, and thus preventing
> > drm_flip_work_queue from directly calling func instead of queuing this
> > call.
> > This allow drm_flip_work_queue to be safely called even within irq
> > handlers.
> >
> > Add new helper functions to allocate a flip work task and queue it when
> > needed. This prevents allocating data within irq context (which might
> > impact the time spent in the irq handler).
> >
> > Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
> > ---
> > Hi Rob,
> >
> > This is a proposal for what you suggested (dynamically growing the drm
> > flip work queue in order to avoid direct call of work->func when calling
> > drm_flip_work_queue).
> >
> > I'm not sure this is exactly what you expected, because I'm now using
> > lists instead of kfifo (and thus lose the lockless part), but at least
> > we can now safely call drm_flip_work_queue or drm_flip_work_queue_task
> > from irq handlers :-).
> >
> > You were also worried about queueing the same framebuffer multiple times
> > and with this implementation you shouldn't have any problem (at least with
> > drm_flip_work_queue, what people do with drm_flip_work_queue_task is their
> > own responsability, but they should allocate one task for each operation
> > even if they are manipulating the same framebuffer).
> 
> yeah, if we are dynamically allocating the list nodes, that solves the
> queuing-up-multiple-times issue..
> 
> I wonder if drm_flip_work_allocate_task() should use GPF_ATOMIC when
> allocating?

That's funny, I was actually modifying the API to pass gfp_t flags to
this function ;-)

> I guess maybe it is possible to pre-allocate the task
> from non-irq context, and then queue it from irq context.. it makes
> the API a bit more complex, but there are only a couple users
> currently, so I suppose this should be doable.

I tried to keep the existing API so that existing users won't see the
difference (I guess none of them are calling drm_flip_work_queue).

I just added the drm_flip_work_allocate_task and
drm_flip_work_queue_task for those who want more control on the
queuing process.

Best Regards,

Boris
Boris BREZILLON July 11, 2014, 3:57 p.m. UTC | #3
On Fri, 11 Jul 2014 17:47:05 +0200
Boris BREZILLON <boris.brezillon@free-electrons.com> wrote:

> On Fri, 11 Jul 2014 11:41:12 -0400
> Rob Clark <robdclark@gmail.com> wrote:
> 
> > On Fri, Jul 11, 2014 at 11:17 AM, Boris BREZILLON
> > <boris.brezillon@free-electrons.com> wrote:
> > > Make use of lists instead of kfifo in order to dynamically allocate
> > > task entry when someone require some delayed work, and thus preventing
> > > drm_flip_work_queue from directly calling func instead of queuing this
> > > call.
> > > This allow drm_flip_work_queue to be safely called even within irq
> > > handlers.
> > >
> > > Add new helper functions to allocate a flip work task and queue it when
> > > needed. This prevents allocating data within irq context (which might
> > > impact the time spent in the irq handler).
> > >
> > > Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
> > > ---
> > > Hi Rob,
> > >
> > > This is a proposal for what you suggested (dynamically growing the drm
> > > flip work queue in order to avoid direct call of work->func when calling
> > > drm_flip_work_queue).
> > >
> > > I'm not sure this is exactly what you expected, because I'm now using
> > > lists instead of kfifo (and thus lose the lockless part), but at least
> > > we can now safely call drm_flip_work_queue or drm_flip_work_queue_task
> > > from irq handlers :-).
> > >
> > > You were also worried about queueing the same framebuffer multiple times
> > > and with this implementation you shouldn't have any problem (at least with
> > > drm_flip_work_queue, what people do with drm_flip_work_queue_task is their
> > > own responsability, but they should allocate one task for each operation
> > > even if they are manipulating the same framebuffer).
> > 
> > yeah, if we are dynamically allocating the list nodes, that solves the
> > queuing-up-multiple-times issue..
> > 
> > I wonder if drm_flip_work_allocate_task() should use GPF_ATOMIC when
> > allocating?
> 
> That's funny, I was actually modifying the API to pass gfp_t flags to
> this function ;-)
> 
> > I guess maybe it is possible to pre-allocate the task
> > from non-irq context, and then queue it from irq context.. it makes
> > the API a bit more complex, but there are only a couple users
> > currently, so I suppose this should be doable.
> 
> I tried to keep the existing API so that existing users won't see the
> difference (I guess none of them are calling drm_flip_work_queue).

Some words are missing :-):

(I guess none of them are calling drm_flip_work_queue from irq
handlers).

> 
> I just added the drm_flip_work_allocate_task and
> drm_flip_work_queue_task for those who want more control on the
> queuing process.
> 
> Best Regards,
> 
> Boris
> 
> 
> 
> 
>
Rob Clark July 11, 2014, 4:05 p.m. UTC | #4
On Fri, Jul 11, 2014 at 11:47 AM, Boris BREZILLON
<boris.brezillon@free-electrons.com> wrote:
> On Fri, 11 Jul 2014 11:41:12 -0400
> Rob Clark <robdclark@gmail.com> wrote:
>
>> On Fri, Jul 11, 2014 at 11:17 AM, Boris BREZILLON
>> <boris.brezillon@free-electrons.com> wrote:
>> > Make use of lists instead of kfifo in order to dynamically allocate
>> > task entry when someone require some delayed work, and thus preventing
>> > drm_flip_work_queue from directly calling func instead of queuing this
>> > call.
>> > This allow drm_flip_work_queue to be safely called even within irq
>> > handlers.
>> >
>> > Add new helper functions to allocate a flip work task and queue it when
>> > needed. This prevents allocating data within irq context (which might
>> > impact the time spent in the irq handler).
>> >
>> > Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
>> > ---
>> > Hi Rob,
>> >
>> > This is a proposal for what you suggested (dynamically growing the drm
>> > flip work queue in order to avoid direct call of work->func when calling
>> > drm_flip_work_queue).
>> >
>> > I'm not sure this is exactly what you expected, because I'm now using
>> > lists instead of kfifo (and thus lose the lockless part), but at least
>> > we can now safely call drm_flip_work_queue or drm_flip_work_queue_task
>> > from irq handlers :-).
>> >
>> > You were also worried about queueing the same framebuffer multiple times
>> > and with this implementation you shouldn't have any problem (at least with
>> > drm_flip_work_queue, what people do with drm_flip_work_queue_task is their
>> > own responsability, but they should allocate one task for each operation
>> > even if they are manipulating the same framebuffer).
>>
>> yeah, if we are dynamically allocating the list nodes, that solves the
>> queuing-up-multiple-times issue..
>>
>> I wonder if drm_flip_work_allocate_task() should use GPF_ATOMIC when
>> allocating?
>
> That's funny, I was actually modifying the API to pass gfp_t flags to
> this function ;-)

yeah, I think passing gfp flags is the better idea

>> I guess maybe it is possible to pre-allocate the task
>> from non-irq context, and then queue it from irq context.. it makes
>> the API a bit more complex, but there are only a couple users
>> currently, so I suppose this should be doable.
>
> I tried to keep the existing API so that existing users won't see the
> difference (I guess none of them are calling drm_flip_work_queue).

we do have existing users that call drm_flip_work_queue() from irq..
but I suppose adding gfp flags arg to drm_flip_work_queue() seems like
a reasonable solution.

BR,
-R

> I just added the drm_flip_work_allocate_task and
> drm_flip_work_queue_task for those who want more control on the
> queuing process.
>
> Best Regards,
>
> Boris
>
>
>
>
>
> --
> Boris Brezillon, Free Electrons
> Embedded Linux and Kernel engineering
> http://free-electrons.com
diff mbox

Patch

diff --git a/drivers/gpu/drm/drm_flip_work.c b/drivers/gpu/drm/drm_flip_work.c
index f9c7fa3..21d5715 100644
--- a/drivers/gpu/drm/drm_flip_work.c
+++ b/drivers/gpu/drm/drm_flip_work.c
@@ -25,6 +25,43 @@ 
 #include "drm_flip_work.h"
 
 /**
+ * drm_flip_work_allocate_task - allocate a flip-work task
+ * @data: data associated to the task
+ *
+ * Allocate a drm_flip_task object and attach private data to it.
+ */
+struct drm_flip_task *drm_flip_work_allocate_task(void *data)
+{
+	struct drm_flip_task *task;
+
+	task = kzalloc(sizeof(*task), GFP_KERNEL);
+	if (task)
+		task->data = data;
+
+	return task;
+}
+EXPORT_SYMBOL(drm_flip_work_allocate_task);
+
+/**
+ * drm_flip_work_queue_task - queue a specific task
+ * @work: the flip-work
+ * @task: the task to handle
+ *
+ * Queues task, that will later be run (passed back to drm_flip_func_t
+ * func) on a work queue after drm_flip_work_commit() is called.
+ */
+void drm_flip_work_queue_task(struct drm_flip_work *work,
+			      struct drm_flip_task *task)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&work->lock, flags);
+	list_add_tail(&task->node, &work->queued);
+	spin_unlock_irqrestore(&work->lock, flags);
+}
+EXPORT_SYMBOL(drm_flip_work_queue_task);
+
+/**
  * drm_flip_work_queue - queue work
  * @work: the flip-work
  * @val: the value to queue
@@ -34,10 +71,14 @@ 
  */
 void drm_flip_work_queue(struct drm_flip_work *work, void *val)
 {
-	if (kfifo_put(&work->fifo, val)) {
-		atomic_inc(&work->pending);
+	struct drm_flip_task *task;
+
+	task = kzalloc(sizeof(*task), GFP_KERNEL);
+	if (task) {
+		task->data = val;
+		drm_flip_work_queue_task(work, task);
 	} else {
-		DRM_ERROR("%s fifo full!\n", work->name);
+		DRM_ERROR("%s could not allocate task!\n", work->name);
 		work->func(work, val);
 	}
 }
@@ -56,9 +97,12 @@  EXPORT_SYMBOL(drm_flip_work_queue);
 void drm_flip_work_commit(struct drm_flip_work *work,
 		struct workqueue_struct *wq)
 {
-	uint32_t pending = atomic_read(&work->pending);
-	atomic_add(pending, &work->count);
-	atomic_sub(pending, &work->pending);
+	unsigned long flags;
+
+	spin_lock_irqsave(&work->lock, flags);
+	list_splice_tail(&work->queued, &work->commited);
+	INIT_LIST_HEAD(&work->queued);
+	spin_unlock_irqrestore(&work->lock, flags);
 	queue_work(wq, &work->worker);
 }
 EXPORT_SYMBOL(drm_flip_work_commit);
@@ -66,14 +110,26 @@  EXPORT_SYMBOL(drm_flip_work_commit);
 static void flip_worker(struct work_struct *w)
 {
 	struct drm_flip_work *work = container_of(w, struct drm_flip_work, worker);
-	uint32_t count = atomic_read(&work->count);
-	void *val = NULL;
+	struct list_head tasks;
+	unsigned long flags;
 
-	atomic_sub(count, &work->count);
+	while (1) {
+		struct drm_flip_task *task, *tmp;
 
-	while(count--)
-		if (!WARN_ON(!kfifo_get(&work->fifo, &val)))
-			work->func(work, val);
+		INIT_LIST_HEAD(&tasks);
+		spin_lock_irqsave(&work->lock, flags);
+		list_splice_tail(&work->commited, &tasks);
+		INIT_LIST_HEAD(&work->commited);
+		spin_unlock_irqrestore(&work->lock, flags);
+
+		if (list_empty(&tasks))
+			break;
+
+		list_for_each_entry_safe(task, tmp, &tasks, node) {
+			work->func(work, task->data);
+			kfree(task);
+		}
+	}
 }
 
 /**
@@ -91,19 +147,11 @@  static void flip_worker(struct work_struct *w)
 int drm_flip_work_init(struct drm_flip_work *work, int size,
 		const char *name, drm_flip_func_t func)
 {
-	int ret;
-
 	work->name = name;
-	atomic_set(&work->count, 0);
-	atomic_set(&work->pending, 0);
+	INIT_LIST_HEAD(&work->queued);
+	INIT_LIST_HEAD(&work->commited);
 	work->func = func;
 
-	ret = kfifo_alloc(&work->fifo, size, GFP_KERNEL);
-	if (ret) {
-		DRM_ERROR("could not allocate %s fifo\n", name);
-		return ret;
-	}
-
 	INIT_WORK(&work->worker, flip_worker);
 
 	return 0;
@@ -118,7 +166,6 @@  EXPORT_SYMBOL(drm_flip_work_init);
  */
 void drm_flip_work_cleanup(struct drm_flip_work *work)
 {
-	WARN_ON(!kfifo_is_empty(&work->fifo));
-	kfifo_free(&work->fifo);
+	WARN_ON(!list_empty(&work->queued) || !list_empty(&work->commited));
 }
 EXPORT_SYMBOL(drm_flip_work_cleanup);
diff --git a/include/drm/drm_flip_work.h b/include/drm/drm_flip_work.h
index 9eed34d..549981f 100644
--- a/include/drm/drm_flip_work.h
+++ b/include/drm/drm_flip_work.h
@@ -25,6 +25,7 @@ 
 #define DRM_FLIP_WORK_H
 
 #include <linux/kfifo.h>
+#include <linux/spinlock.h>
 #include <linux/workqueue.h>
 
 /**
@@ -32,9 +33,7 @@ 
  *
  * Util to queue up work to run from work-queue context after flip/vblank.
  * Typically this can be used to defer unref of framebuffer's, cursor
- * bo's, etc until after vblank.  The APIs are all safe (and lockless)
- * for up to one producer and once consumer at a time.  The single-consumer
- * aspect is ensured by committing the queued work to a single work-queue.
+ * bo's, etc until after vblank.  The APIs are all safe.
  */
 
 struct drm_flip_work;
@@ -51,22 +50,36 @@  struct drm_flip_work;
 typedef void (*drm_flip_func_t)(struct drm_flip_work *work, void *val);
 
 /**
+ * struct drm_flip_task - flip work task
+ * @node: list entry element
+ * @data: data to pass to work->func
+ */
+struct drm_flip_task {
+	struct list_head node;
+	void *data;
+};
+
+/**
  * struct drm_flip_work - flip work queue
  * @name: debug name
- * @pending: number of queued but not committed items
- * @count: number of committed items
  * @func: callback fxn called for each committed item
  * @worker: worker which calls @func
- * @fifo: queue of committed items
+ * @queued: queued tasks
+ * @commited: commited tasks
+ * @lock: lock to access queued and commited lists
  */
 struct drm_flip_work {
 	const char *name;
-	atomic_t pending, count;
 	drm_flip_func_t func;
 	struct work_struct worker;
-	DECLARE_KFIFO_PTR(fifo, void *);
+	struct list_head queued;
+	struct list_head commited;
+	spinlock_t lock;
 };
 
+struct drm_flip_task *drm_flip_work_allocate_task(void *data);
+void drm_flip_work_queue_task(struct drm_flip_work *work,
+			      struct drm_flip_task *task);
 void drm_flip_work_queue(struct drm_flip_work *work, void *val);
 void drm_flip_work_commit(struct drm_flip_work *work,
 		struct workqueue_struct *wq);