diff mbox series

[v6,08/18] drm/virtio: rework virtio_gpu_execbuffer_ioctl fencing

Message ID 20190702141903.1131-9-kraxel@redhat.com (mailing list archive)
State New, archived
Headers show
Series drm/virtio: switch from ttm to gem shmem helpers. | expand

Commit Message

Gerd Hoffmann July 2, 2019, 2:18 p.m. UTC
Rework fencing workflow, starting with virtio_gpu_execbuffer_ioctl.
Stop using ttm helpers, use the virtio_gpu_array_* helpers (which work
on the reservation objects directly) instead.

New workflow:

 (1) All gem objects needed by a command are added to a
     virtio_gpu_object_array.
 (2) All reservation objects will be locked (virtio_gpu_array_lock_resv).
 (3) virtio_gpu_fence_emit() completes fence initialization.
 (4) fence gets added to the objects, reservation objects are unlocked
     (virtio_gpu_array_add_fence, virtio_gpu_array_unlock_resv).
 (5) virtio command is submitted to the host.
 (6) The completion callback (virtio_gpu_dequeue_ctrl_func)
     will drop object references and free virtio_gpu_object_array.

v6: rewrite most of the patch.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 drivers/gpu/drm/virtio/virtgpu_drv.h   |  6 ++-
 drivers/gpu/drm/virtio/virtgpu_ioctl.c | 56 +++++++++-----------------
 drivers/gpu/drm/virtio/virtgpu_vq.c    | 21 +++++++---
 3 files changed, 38 insertions(+), 45 deletions(-)

Comments

Chia-I Wu July 3, 2019, 6:49 p.m. UTC | #1
On Tue, Jul 2, 2019 at 7:19 AM Gerd Hoffmann <kraxel@redhat.com> wrote:
>
> Rework fencing workflow, starting with virtio_gpu_execbuffer_ioctl.
> Stop using ttm helpers, use the virtio_gpu_array_* helpers (which work
> on the reservation objects directly) instead.
>
> New workflow:
>
>  (1) All gem objects needed by a command are added to a
>      virtio_gpu_object_array.
>  (2) All reservation objects will be locked (virtio_gpu_array_lock_resv).
>  (3) virtio_gpu_fence_emit() completes fence initialization.
>  (4) fence gets added to the objects, reservation objects are unlocked
>      (virtio_gpu_array_add_fence, virtio_gpu_array_unlock_resv).
>  (5) virtio command is submitted to the host.
>  (6) The completion callback (virtio_gpu_dequeue_ctrl_func)
>      will drop object references and free virtio_gpu_object_array.
>
> v6: rewrite most of the patch.
>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>  drivers/gpu/drm/virtio/virtgpu_drv.h   |  6 ++-
>  drivers/gpu/drm/virtio/virtgpu_ioctl.c | 56 +++++++++-----------------
>  drivers/gpu/drm/virtio/virtgpu_vq.c    | 21 +++++++---
>  3 files changed, 38 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
> index abb078a5dedf..98511d1dfff2 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_drv.h
> +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
> @@ -121,9 +121,9 @@ struct virtio_gpu_vbuffer {
>
>         char *resp_buf;
>         int resp_size;
> -
>         virtio_gpu_resp_cb resp_cb;
>
> +       struct virtio_gpu_object_array *objs;
>         struct list_head list;
>  };
>
> @@ -318,7 +318,9 @@ void virtio_gpu_cmd_context_detach_resource(struct virtio_gpu_device *vgdev,
>                                             uint32_t resource_id);
>  void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,
>                            void *data, uint32_t data_size,
> -                          uint32_t ctx_id, struct virtio_gpu_fence *fence);
> +                          uint32_t ctx_id,
> +                          struct virtio_gpu_object_array *objs,
> +                          struct virtio_gpu_fence *fence);
>  void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev,
>                                           uint32_t resource_id, uint32_t ctx_id,
>                                           uint64_t offset, uint32_t level,
> diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> index 0caff3fa623e..9735d7e5899b 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> @@ -105,16 +105,11 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
>         struct drm_virtgpu_execbuffer *exbuf = data;
>         struct virtio_gpu_device *vgdev = dev->dev_private;
>         struct virtio_gpu_fpriv *vfpriv = drm_file->driver_priv;
> -       struct drm_gem_object *gobj;
>         struct virtio_gpu_fence *out_fence;
> -       struct virtio_gpu_object *qobj;
>         int ret;
>         uint32_t *bo_handles = NULL;
>         void __user *user_bo_handles = NULL;
> -       struct list_head validate_list;
> -       struct ttm_validate_buffer *buflist = NULL;
> -       int i;
> -       struct ww_acquire_ctx ticket;
> +       struct virtio_gpu_object_array *buflist = NULL;
>         struct sync_file *sync_file;
>         int in_fence_fd = exbuf->fence_fd;
>         int out_fence_fd = -1;
> @@ -155,15 +150,10 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
>                         return out_fence_fd;
>         }
>
> -       INIT_LIST_HEAD(&validate_list);
>         if (exbuf->num_bo_handles) {
> -
>                 bo_handles = kvmalloc_array(exbuf->num_bo_handles,
> -                                          sizeof(uint32_t), GFP_KERNEL);
> -               buflist = kvmalloc_array(exbuf->num_bo_handles,
> -                                          sizeof(struct ttm_validate_buffer),
> -                                          GFP_KERNEL | __GFP_ZERO);
> -               if (!bo_handles || !buflist) {
> +                                           sizeof(uint32_t), GFP_KERNEL);
> +               if (!bo_handles) {
>                         ret = -ENOMEM;
>                         goto out_unused_fd;
>                 }
> @@ -175,25 +165,21 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
>                         goto out_unused_fd;
>                 }
>
> -               for (i = 0; i < exbuf->num_bo_handles; i++) {
> -                       gobj = drm_gem_object_lookup(drm_file, bo_handles[i]);
> -                       if (!gobj) {
> -                               ret = -ENOENT;
> -                               goto out_unused_fd;
> -                       }
> -
> -                       qobj = gem_to_virtio_gpu_obj(gobj);
> -                       buflist[i].bo = &qobj->tbo;
> -
> -                       list_add(&buflist[i].head, &validate_list);
> +               buflist = virtio_gpu_array_from_handles(drm_file, bo_handles,
> +                                                       exbuf->num_bo_handles);
> +               if (!buflist) {
> +                       ret = -ENOENT;
> +                       goto out_unused_fd;
>                 }
>                 kvfree(bo_handles);
>                 bo_handles = NULL;
>         }
>
> -       ret = virtio_gpu_object_list_validate(&ticket, &validate_list);
> -       if (ret)
> -               goto out_free;
> +       if (buflist) {
> +               ret = virtio_gpu_array_lock_resv(buflist);
> +               if (ret)
> +                       goto out_unused_fd;
> +       }
>
>         buf = memdup_user(u64_to_user_ptr(exbuf->command), exbuf->size);
>         if (IS_ERR(buf)) {
> @@ -220,24 +206,18 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
>         }
>
>         virtio_gpu_cmd_submit(vgdev, buf, exbuf->size,
> -                             vfpriv->ctx_id, out_fence);
> -
> -       ttm_eu_fence_buffer_objects(&ticket, &validate_list, &out_fence->f);
> -
> -       /* fence the command bo */
> -       virtio_gpu_unref_list(&validate_list);
> -       kvfree(buflist);
> +                             vfpriv->ctx_id, buflist, out_fence);
>         return 0;
>
>  out_memdup:
>         kfree(buf);
>  out_unresv:
> -       ttm_eu_backoff_reservation(&ticket, &validate_list);
> -out_free:
> -       virtio_gpu_unref_list(&validate_list);
> +       if (buflist)
> +               virtio_gpu_array_unlock_resv(buflist);
>  out_unused_fd:
>         kvfree(bo_handles);
> -       kvfree(buflist);
> +       if (buflist)
> +               virtio_gpu_array_put_free(buflist);
>
>         if (out_fence_fd >= 0)
>                 put_unused_fd(out_fence_fd);
> diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
> index 6c1a90717535..dbe329801e84 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_vq.c
> +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
> @@ -191,7 +191,7 @@ void virtio_gpu_dequeue_ctrl_func(struct work_struct *work)
>         } while (!virtqueue_enable_cb(vgdev->ctrlq.vq));
>         spin_unlock(&vgdev->ctrlq.qlock);
>
> -       list_for_each_entry_safe(entry, tmp, &reclaim_list, list) {
> +       list_for_each_entry(entry, &reclaim_list, list) {
>                 resp = (struct virtio_gpu_ctrl_hdr *)entry->resp_buf;
>
>                 trace_virtio_gpu_cmd_response(vgdev->ctrlq.vq, resp);
> @@ -218,14 +218,18 @@ void virtio_gpu_dequeue_ctrl_func(struct work_struct *work)
>                 }
>                 if (entry->resp_cb)
>                         entry->resp_cb(vgdev, entry);
> -
> -               list_del(&entry->list);
> -               free_vbuf(vgdev, entry);
>         }
>         wake_up(&vgdev->ctrlq.ack_queue);
>
>         if (fence_id)
>                 virtio_gpu_fence_event_process(vgdev, fence_id);
> +
> +       list_for_each_entry_safe(entry, tmp, &reclaim_list, list) {
> +               if (entry->objs)
> +                       virtio_gpu_array_put_free(entry->objs);
> +               list_del(&entry->list);
> +               free_vbuf(vgdev, entry);
> +       }
>  }
>
>  void virtio_gpu_dequeue_cursor_func(struct work_struct *work)
> @@ -337,6 +341,10 @@ static int virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev,
>
>         if (fence)
>                 virtio_gpu_fence_emit(vgdev, hdr, fence);
> +       if (vbuf->objs) {
> +               virtio_gpu_array_add_fence(vbuf->objs, &fence->f);
> +               virtio_gpu_array_unlock_resv(vbuf->objs);
> +       }
This is with the spinlock held.  Maybe we should move the
virtio_gpu_array_unlock_resv call out of the critical section.

I am actually more concerned about virtio_gpu_array_add_fence, but it
is also harder to move.  Should we add a kref to the object array?

This bothers me because I recently ran into a CPU-bound game with very
bad lock contention here.

>         rc = virtio_gpu_queue_ctrl_buffer_locked(vgdev, vbuf);
>         spin_unlock(&vgdev->ctrlq.qlock);
>         return rc;
> @@ -939,7 +947,9 @@ void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev,
>
>  void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,
>                            void *data, uint32_t data_size,
> -                          uint32_t ctx_id, struct virtio_gpu_fence *fence)
> +                          uint32_t ctx_id,
> +                          struct virtio_gpu_object_array *objs,
> +                          struct virtio_gpu_fence *fence)
>  {
>         struct virtio_gpu_cmd_submit *cmd_p;
>         struct virtio_gpu_vbuffer *vbuf;
> @@ -949,6 +959,7 @@ void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,
>
>         vbuf->data_buf = data;
>         vbuf->data_size = data_size;
> +       vbuf->objs = objs;
>
>         cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_SUBMIT_3D);
>         cmd_p->hdr.ctx_id = cpu_to_le32(ctx_id);
> --
> 2.18.1
>
Gerd Hoffmann July 4, 2019, 11:25 a.m. UTC | #2
Hi,

> >         if (fence)
> >                 virtio_gpu_fence_emit(vgdev, hdr, fence);
> > +       if (vbuf->objs) {
> > +               virtio_gpu_array_add_fence(vbuf->objs, &fence->f);
> > +               virtio_gpu_array_unlock_resv(vbuf->objs);
> > +       }
> This is with the spinlock held.  Maybe we should move the
> virtio_gpu_array_unlock_resv call out of the critical section.

That would bring back the race ...

> I am actually more concerned about virtio_gpu_array_add_fence, but it
> is also harder to move.  Should we add a kref to the object array?

Yep, refcounting would be the other way to fix the race.

> This bothers me because I recently ran into a CPU-bound game with very
> bad lock contention here.

Hmm.  Any clue where this comes from?  Multiple threads competing for
virtio buffers I guess?  Maybe we should have larger virtqueues?

cheers,
  Gerd
Chia-I Wu July 4, 2019, 6:46 p.m. UTC | #3
On Thu, Jul 4, 2019 at 4:25 AM Gerd Hoffmann <kraxel@redhat.com> wrote:
>
>   Hi,
>
> > >         if (fence)
> > >                 virtio_gpu_fence_emit(vgdev, hdr, fence);
> > > +       if (vbuf->objs) {
> > > +               virtio_gpu_array_add_fence(vbuf->objs, &fence->f);
> > > +               virtio_gpu_array_unlock_resv(vbuf->objs);
> > > +       }
> > This is with the spinlock held.  Maybe we should move the
> > virtio_gpu_array_unlock_resv call out of the critical section.
>
> That would bring back the race ...
Right...
>
> > I am actually more concerned about virtio_gpu_array_add_fence, but it
> > is also harder to move.  Should we add a kref to the object array?
>
> Yep, refcounting would be the other way to fix the race.
>
> > This bothers me because I recently ran into a CPU-bound game with very
> > bad lock contention here.
>
> Hmm.  Any clue where this comes from?  Multiple threads competing for
> virtio buffers I guess?  Maybe we should have larger virtqueues?
The game was single-threaded.  I guess it was the game and Xorg
competing for virtio buffers.  That was also on an older kernel
without explicit fences.  The userspace had to create dummy resources
frequently to do VIRTIO_IOCTL_RESOURCE_WAIT.

I think this is fine for now as far as I am concerned.  I can look
into this more closely after this series lands.


>
> cheers,
>   Gerd
>
Chia-I Wu July 11, 2019, 2:35 a.m. UTC | #4
On Thu, Jul 4, 2019 at 11:46 AM Chia-I Wu <olvaffe@gmail.com> wrote:
>
> On Thu, Jul 4, 2019 at 4:25 AM Gerd Hoffmann <kraxel@redhat.com> wrote:
> >
> >   Hi,
> >
> > > >         if (fence)
> > > >                 virtio_gpu_fence_emit(vgdev, hdr, fence);
> > > > +       if (vbuf->objs) {
> > > > +               virtio_gpu_array_add_fence(vbuf->objs, &fence->f);
> > > > +               virtio_gpu_array_unlock_resv(vbuf->objs);
> > > > +       }
> > > This is with the spinlock held.  Maybe we should move the
> > > virtio_gpu_array_unlock_resv call out of the critical section.
> >
> > That would bring back the race ...
> Right...
> >
> > > I am actually more concerned about virtio_gpu_array_add_fence, but it
> > > is also harder to move.  Should we add a kref to the object array?
> >
> > Yep, refcounting would be the other way to fix the race.
> >
> > > This bothers me because I recently ran into a CPU-bound game with very
> > > bad lock contention here.
> >
> > Hmm.  Any clue where this comes from?  Multiple threads competing for
> > virtio buffers I guess?  Maybe we should have larger virtqueues?
> The game was single-threaded.  I guess it was the game and Xorg
> competing for virtio buffers.  That was also on an older kernel
> without explicit fences.  The userspace had to create dummy resources
> frequently to do VIRTIO_IOCTL_RESOURCE_WAIT.
>
> I think this is fine for now as far as I am concerned.  I can look
> into this more closely after this series lands.
It was virtio_gpu_dequeue_ctrl_func who wanted to grab the lock to
handle the responses.  I sent a patch for it

  https://patchwork.freedesktop.org/series/63529/

>
>
> >
> > cheers,
> >   Gerd
> >
diff mbox series

Patch

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index abb078a5dedf..98511d1dfff2 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -121,9 +121,9 @@  struct virtio_gpu_vbuffer {
 
 	char *resp_buf;
 	int resp_size;
-
 	virtio_gpu_resp_cb resp_cb;
 
+	struct virtio_gpu_object_array *objs;
 	struct list_head list;
 };
 
@@ -318,7 +318,9 @@  void virtio_gpu_cmd_context_detach_resource(struct virtio_gpu_device *vgdev,
 					    uint32_t resource_id);
 void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,
 			   void *data, uint32_t data_size,
-			   uint32_t ctx_id, struct virtio_gpu_fence *fence);
+			   uint32_t ctx_id,
+			   struct virtio_gpu_object_array *objs,
+			   struct virtio_gpu_fence *fence);
 void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev,
 					  uint32_t resource_id, uint32_t ctx_id,
 					  uint64_t offset, uint32_t level,
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index 0caff3fa623e..9735d7e5899b 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -105,16 +105,11 @@  static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 	struct drm_virtgpu_execbuffer *exbuf = data;
 	struct virtio_gpu_device *vgdev = dev->dev_private;
 	struct virtio_gpu_fpriv *vfpriv = drm_file->driver_priv;
-	struct drm_gem_object *gobj;
 	struct virtio_gpu_fence *out_fence;
-	struct virtio_gpu_object *qobj;
 	int ret;
 	uint32_t *bo_handles = NULL;
 	void __user *user_bo_handles = NULL;
-	struct list_head validate_list;
-	struct ttm_validate_buffer *buflist = NULL;
-	int i;
-	struct ww_acquire_ctx ticket;
+	struct virtio_gpu_object_array *buflist = NULL;
 	struct sync_file *sync_file;
 	int in_fence_fd = exbuf->fence_fd;
 	int out_fence_fd = -1;
@@ -155,15 +150,10 @@  static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 			return out_fence_fd;
 	}
 
-	INIT_LIST_HEAD(&validate_list);
 	if (exbuf->num_bo_handles) {
-
 		bo_handles = kvmalloc_array(exbuf->num_bo_handles,
-					   sizeof(uint32_t), GFP_KERNEL);
-		buflist = kvmalloc_array(exbuf->num_bo_handles,
-					   sizeof(struct ttm_validate_buffer),
-					   GFP_KERNEL | __GFP_ZERO);
-		if (!bo_handles || !buflist) {
+					    sizeof(uint32_t), GFP_KERNEL);
+		if (!bo_handles) {
 			ret = -ENOMEM;
 			goto out_unused_fd;
 		}
@@ -175,25 +165,21 @@  static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 			goto out_unused_fd;
 		}
 
-		for (i = 0; i < exbuf->num_bo_handles; i++) {
-			gobj = drm_gem_object_lookup(drm_file, bo_handles[i]);
-			if (!gobj) {
-				ret = -ENOENT;
-				goto out_unused_fd;
-			}
-
-			qobj = gem_to_virtio_gpu_obj(gobj);
-			buflist[i].bo = &qobj->tbo;
-
-			list_add(&buflist[i].head, &validate_list);
+		buflist = virtio_gpu_array_from_handles(drm_file, bo_handles,
+							exbuf->num_bo_handles);
+		if (!buflist) {
+			ret = -ENOENT;
+			goto out_unused_fd;
 		}
 		kvfree(bo_handles);
 		bo_handles = NULL;
 	}
 
-	ret = virtio_gpu_object_list_validate(&ticket, &validate_list);
-	if (ret)
-		goto out_free;
+	if (buflist) {
+		ret = virtio_gpu_array_lock_resv(buflist);
+		if (ret)
+			goto out_unused_fd;
+	}
 
 	buf = memdup_user(u64_to_user_ptr(exbuf->command), exbuf->size);
 	if (IS_ERR(buf)) {
@@ -220,24 +206,18 @@  static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 	}
 
 	virtio_gpu_cmd_submit(vgdev, buf, exbuf->size,
-			      vfpriv->ctx_id, out_fence);
-
-	ttm_eu_fence_buffer_objects(&ticket, &validate_list, &out_fence->f);
-
-	/* fence the command bo */
-	virtio_gpu_unref_list(&validate_list);
-	kvfree(buflist);
+			      vfpriv->ctx_id, buflist, out_fence);
 	return 0;
 
 out_memdup:
 	kfree(buf);
 out_unresv:
-	ttm_eu_backoff_reservation(&ticket, &validate_list);
-out_free:
-	virtio_gpu_unref_list(&validate_list);
+	if (buflist)
+		virtio_gpu_array_unlock_resv(buflist);
 out_unused_fd:
 	kvfree(bo_handles);
-	kvfree(buflist);
+	if (buflist)
+		virtio_gpu_array_put_free(buflist);
 
 	if (out_fence_fd >= 0)
 		put_unused_fd(out_fence_fd);
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
index 6c1a90717535..dbe329801e84 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -191,7 +191,7 @@  void virtio_gpu_dequeue_ctrl_func(struct work_struct *work)
 	} while (!virtqueue_enable_cb(vgdev->ctrlq.vq));
 	spin_unlock(&vgdev->ctrlq.qlock);
 
-	list_for_each_entry_safe(entry, tmp, &reclaim_list, list) {
+	list_for_each_entry(entry, &reclaim_list, list) {
 		resp = (struct virtio_gpu_ctrl_hdr *)entry->resp_buf;
 
 		trace_virtio_gpu_cmd_response(vgdev->ctrlq.vq, resp);
@@ -218,14 +218,18 @@  void virtio_gpu_dequeue_ctrl_func(struct work_struct *work)
 		}
 		if (entry->resp_cb)
 			entry->resp_cb(vgdev, entry);
-
-		list_del(&entry->list);
-		free_vbuf(vgdev, entry);
 	}
 	wake_up(&vgdev->ctrlq.ack_queue);
 
 	if (fence_id)
 		virtio_gpu_fence_event_process(vgdev, fence_id);
+
+	list_for_each_entry_safe(entry, tmp, &reclaim_list, list) {
+		if (entry->objs)
+			virtio_gpu_array_put_free(entry->objs);
+		list_del(&entry->list);
+		free_vbuf(vgdev, entry);
+	}
 }
 
 void virtio_gpu_dequeue_cursor_func(struct work_struct *work)
@@ -337,6 +341,10 @@  static int virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev,
 
 	if (fence)
 		virtio_gpu_fence_emit(vgdev, hdr, fence);
+	if (vbuf->objs) {
+		virtio_gpu_array_add_fence(vbuf->objs, &fence->f);
+		virtio_gpu_array_unlock_resv(vbuf->objs);
+	}
 	rc = virtio_gpu_queue_ctrl_buffer_locked(vgdev, vbuf);
 	spin_unlock(&vgdev->ctrlq.qlock);
 	return rc;
@@ -939,7 +947,9 @@  void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev,
 
 void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,
 			   void *data, uint32_t data_size,
-			   uint32_t ctx_id, struct virtio_gpu_fence *fence)
+			   uint32_t ctx_id,
+			   struct virtio_gpu_object_array *objs,
+			   struct virtio_gpu_fence *fence)
 {
 	struct virtio_gpu_cmd_submit *cmd_p;
 	struct virtio_gpu_vbuffer *vbuf;
@@ -949,6 +959,7 @@  void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,
 
 	vbuf->data_buf = data;
 	vbuf->data_size = data_size;
+	vbuf->objs = objs;
 
 	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_SUBMIT_3D);
 	cmd_p->hdr.ctx_id = cpu_to_le32(ctx_id);