diff mbox series

[v2] drm/msm: Add syncobj support.

Message ID 20200116230417.12076-1-bas@basnieuwenhuizen.nl (mailing list archive)
State New, archived
Headers show
Series [v2] drm/msm: Add syncobj support. | expand

Commit Message

Bas Nieuwenhuizen Jan. 16, 2020, 11:04 p.m. UTC
This

1) Enables core DRM syncobj support.
2) Adds options to the submission ioctl to wait/signal syncobjs.

Just like the wait fence fd, this does inline waits. Using the
scheduler would be nice but I believe it is out of scope for
this work.

Support for timeline syncobjs is implemented and the interface
is ready for it, but I'm not enabling it yet until there is
some code for turnip to use it.

The reset is mostly in there because in the presence of waiting
and signalling the same semaphores, resetting them after
signalling can become very annoying.

v2:
  - Fixed style issues
  - Removed a cleanup issue in a failure case
  - Moved to a copy_from_user per syncobj

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 drivers/gpu/drm/msm/msm_drv.c        |   6 +-
 drivers/gpu/drm/msm/msm_gem_submit.c | 236 ++++++++++++++++++++++++++-
 include/uapi/drm/msm_drm.h           |  24 ++-
 3 files changed, 262 insertions(+), 4 deletions(-)

Comments

Jordan Crouse Jan. 17, 2020, 6:17 p.m. UTC | #1
On Fri, Jan 17, 2020 at 12:04:17AM +0100, Bas Nieuwenhuizen wrote:
> This
> 
> 1) Enables core DRM syncobj support.
> 2) Adds options to the submission ioctl to wait/signal syncobjs.
> 
> Just like the wait fence fd, this does inline waits. Using the
> scheduler would be nice but I believe it is out of scope for
> this work.
> 
> Support for timeline syncobjs is implemented and the interface
> is ready for it, but I'm not enabling it yet until there is
> some code for turnip to use it.
> 
> The reset is mostly in there because in the presence of waiting
> and signalling the same semaphores, resetting them after
> signalling can become very annoying.
> 
> v2:
>   - Fixed style issues
>   - Removed a cleanup issue in a failure case
>   - Moved to a copy_from_user per syncobj

A few nits, but nothing serious.  This is looking good, thanks for the quick
turn around.

> Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> ---
>  drivers/gpu/drm/msm/msm_drv.c        |   6 +-
>  drivers/gpu/drm/msm/msm_gem_submit.c | 236 ++++++++++++++++++++++++++-
>  include/uapi/drm/msm_drm.h           |  24 ++-
>  3 files changed, 262 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index c84f0a8b3f2c..5246b41798df 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -37,9 +37,10 @@
>   * - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
>   *           GEM object's debug name
>   * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
> + * - 1.6.0 - Syncobj support
>   */
>  #define MSM_VERSION_MAJOR	1
> -#define MSM_VERSION_MINOR	5
> +#define MSM_VERSION_MINOR	6
>  #define MSM_VERSION_PATCHLEVEL	0
>  
>  static const struct drm_mode_config_funcs mode_config_funcs = {
> @@ -988,7 +989,8 @@ static struct drm_driver msm_driver = {
>  	.driver_features    = DRIVER_GEM |
>  				DRIVER_RENDER |
>  				DRIVER_ATOMIC |
> -				DRIVER_MODESET,
> +				DRIVER_MODESET |
> +				DRIVER_SYNCOBJ,
>  	.open               = msm_open,
>  	.postclose           = msm_postclose,
>  	.lastclose          = drm_fb_helper_lastclose,
> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> index be5327af16fa..6c7f95fc5cfd 100644
> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> @@ -8,7 +8,9 @@
>  #include <linux/sync_file.h>
>  #include <linux/uaccess.h>
>  
> +#include <drm/drm_drv.h>
>  #include <drm/drm_file.h>
> +#include <drm/drm_syncobj.h>
>  
>  #include "msm_drv.h"
>  #include "msm_gpu.h"
> @@ -394,6 +396,191 @@ static void submit_cleanup(struct msm_gem_submit *submit)
>  	ww_acquire_fini(&submit->ticket);
>  }
>  
> +
> +struct msm_submit_post_dep {
> +	struct drm_syncobj *syncobj;
> +	uint64_t point;
> +	struct dma_fence_chain *chain;
> +};
> +
> +static int msm_wait_deps(struct drm_device *dev,
> +                         struct drm_file *file,
> +                         uint64_t in_syncobjs_addr,
> +                         uint32_t nr_in_syncobjs,
> +                         uint32_t syncobj_stride,
> +                         struct msm_ringbuffer *ring,
> +                         struct drm_syncobj ***syncobjs)
> +{
> +	struct drm_msm_gem_submit_syncobj syncobj_desc = {0};
> +	int ret = 0;
> +	uint32_t i, j;
> +
> +	*syncobjs = kcalloc(nr_in_syncobjs, sizeof(**syncobjs),
> +	                    GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);

Perfect - I think this will do everything we want - protect us against OOM death
but also not introduce artificial constraints on object counts.

> +	if (!syncobjs) {

You should be able to just return -ENOMEM here.

> +		ret = -ENOMEM;
> +		goto out_syncobjs;
> +	}

> +
> +	for (i = 0; i < nr_in_syncobjs; ++i) {
> +		uint64_t address = in_syncobjs_addr + i * syncobj_stride;
> +		struct dma_fence *fence;
> +
> +		if (copy_from_user(&syncobj_desc,
> +			           u64_to_user_ptr(address),
> +			           min(syncobj_stride, sizeof(syncobj_desc)))) {
> +			ret = -EFAULT;
> +			goto out_syncobjs;

You can just use break here.

> +		}
> +
> +		if (syncobj_desc.point &&
> +		    !drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) {
> +			ret = -EOPNOTSUPP;
> +			break;
> +		}
> +
> +		if (syncobj_desc.flags & ~MSM_SUBMIT_SYNCOBJ_FLAGS) {
> +			ret = -EINVAL;
> +			break;
> +		}
> +
> +		ret = drm_syncobj_find_fence(file, syncobj_desc.handle,
> +		                             syncobj_desc.point, 0, &fence);
> +		if (ret)
> +			break;
> +
> +		if (!dma_fence_match_context(fence, ring->fctx->context))
> +			ret = dma_fence_wait(fence, true);
> +
> +		dma_fence_put(fence);
> +		if (ret)
> +			break;
> +
> +		if (syncobj_desc.flags & MSM_SUBMIT_SYNCOBJ_RESET) {
> +			(*syncobjs)[i] =
> +				drm_syncobj_find(file, syncobj_desc.handle);
> +			if (!(*syncobjs)[i]) {
> +				ret = -EINVAL;
> +				break;
> +			}
> +		}
> +	}
> +
> +out_syncobjs:
> +	if (ret && *syncobjs) {
> +		for (j = 0; j < i; ++j) {

You could also walk backwards from i and save having another iterator:

for( ; i >=0; i--)

> +			if ((*syncobjs)[j])
> +				drm_syncobj_put((*syncobjs)[j]);
> +		}
> +		kfree(*syncobjs);
> +		*syncobjs = NULL;
> +	}
> +	return ret;

if you wanted to you could return syncobjs or ERR_PTR instead of passing it by
reference. I would probably chose that option personally but it is up to you.

> +}
> +
> +static void msm_reset_syncobjs(struct drm_syncobj **syncobjs,
> +                               uint32_t nr_syncobjs)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; syncobjs && i < nr_syncobjs; ++i) {
> +		if (syncobjs[i])
> +			drm_syncobj_replace_fence(syncobjs[i], NULL);
> +	}
> +}
> +
> +static int msm_parse_post_deps(struct drm_device *dev,
> +                               struct drm_file *file,
> +                               uint64_t out_syncobjs_addr,
> +                               uint32_t nr_out_syncobjs,
> +			       uint32_t syncobj_stride,
> +                               struct msm_submit_post_dep **post_deps)
> +{
> +	int ret = 0;
> +	uint32_t i, j;
> +
> +	*post_deps = kmalloc_array(nr_out_syncobjs, sizeof(**post_deps),
> +	                           GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
> +	if (!*post_deps) {
> +		ret = -ENOMEM;
> +		goto out_syncobjs;

return -ENOMEM works fine.

> +	}
> +
> +	for (i = 0; i < nr_out_syncobjs; ++i) {
> +		uint64_t address = out_syncobjs_addr + i * syncobj_stride;
> +
> +		if (copy_from_user(&syncobj_desc,
> +			           u64_to_user_ptr(address),
> +			           min(syncobj_stride, sizeof(syncobj_desc)))) {
> +			ret = -EFAULT;
> +			goto out_syncobjs;

You can break here.

> +		}
> +
> +		(*post_deps)[i].point = syncobj_desc.point;
> +		(*post_deps)[i].chain = NULL;
> +
> +		if (syncobj_desc.flags) {
> +			ret = -EINVAL;
> +			break;
> +		}
> +
> +		if (syncobj_desc.point) {
> +			if (!drm_core_check_feature(dev,
> +			                            DRIVER_SYNCOBJ_TIMELINE)) {
> +				ret = -EOPNOTSUPP;
> +				break;
> +			}
> +
> +			(*post_deps)[i].chain =
> +				kmalloc(sizeof(*(*post_deps)[i].chain),
> +				        GFP_KERNEL);
> +			if (!(*post_deps)[i].chain) {
> +				ret = -ENOMEM;
> +				break;
> +			}
> +		}
> +
> +		(*post_deps)[i].syncobj =
> +			drm_syncobj_find(file, syncobj_desc.handle);
> +		if (!(*post_deps)[i].syncobj) {
> +			ret = -EINVAL;
> +			break;
> +		}
> +	}
> +
> +	if (ret) {
> +		for (j = 0; j <= i; ++j) {

Same suggest as above, if you would prefer.

> +			kfree((*post_deps)[j].chain);
> +			if ((*post_deps)[j].syncobj)
> +				drm_syncobj_put((*post_deps)[j].syncobj);
> +		}
> +
> +		kfree(*post_deps);
> +		*post_deps = NULL;
> +	}
> +
> +out_syncobjs:
> +	return ret;

This might also be a good spot to return post_deps / ERR_PTR.

> +}
> +
> +static void msm_process_post_deps(struct msm_submit_post_dep *post_deps,
> +                                  uint32_t count, struct dma_fence *fence)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; post_deps && i < count; ++i) {
> +		if (post_deps[i].chain) {
> +			drm_syncobj_add_point(post_deps[i].syncobj,
> +			                      post_deps[i].chain,
> +			                      fence, post_deps[i].point);
> +			post_deps[i].chain = NULL;
> +		} else {
> +			drm_syncobj_replace_fence(post_deps[i].syncobj,
> +			                          fence);
> +		}
> +	}
> +}
> +
>  int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
>  		struct drm_file *file)
>  {
> @@ -406,6 +593,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
>  	struct sync_file *sync_file = NULL;
>  	struct msm_gpu_submitqueue *queue;
>  	struct msm_ringbuffer *ring;
> +	struct msm_submit_post_dep *post_deps = NULL;
> +	struct drm_syncobj **syncobjs_to_reset = NULL;
>  	int out_fence_fd = -1;
>  	struct pid *pid = get_pid(task_pid(current));
>  	unsigned i;
> @@ -413,6 +602,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
>  	if (!gpu)
>  		return -ENXIO;
>  
> +	if (args->pad)
> +		return -EINVAL;
> +

>  	/* for now, we just have 3d pipe.. eventually this would need to
>  	 * be more clever to dispatch to appropriate gpu module:
>  	 */
> @@ -460,9 +652,28 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
>  			return ret;
>  	}
>  
> +	if (args->flags & MSM_SUBMIT_SYNCOBJ_IN) {
> +		ret = msm_wait_deps(dev, file,
> +		                    args->in_syncobjs, args->nr_in_syncobjs,
> +		                    args->syncobj_stride, ring,

If you wanted to, you could just pass args directly to the function so you
didn't have to take it apart.

Also, Rob - do we want to do the trick where we return
sizeof(drm_msm_gem_submit_syncobj) if the input stride is zero?

> +		                    &syncobjs_to_reset);
> +		if (ret)
> +			goto out_post_unlock;
> +	}
> +
> +	if (args->flags & MSM_SUBMIT_SYNCOBJ_OUT) {
> +		ret = msm_parse_post_deps(dev, file,
> +		                          args->out_syncobjs,
> +		                          args->nr_out_syncobjs,
> +		                          args->syncobj_stride,

And same.

> +		                          &post_deps);
> +		if (ret)
> +			goto out_post_unlock;
> +	}
> +
>  	ret = mutex_lock_interruptible(&dev->struct_mutex);
>  	if (ret)
> -		return ret;
> +		goto out_post_unlock;
>  
>  	if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
>  		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
> @@ -586,6 +797,11 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
>  		args->fence_fd = out_fence_fd;
>  	}
>  
> +	msm_reset_syncobjs(syncobjs_to_reset, args->nr_in_syncobjs);
> +	msm_process_post_deps(post_deps, args->nr_out_syncobjs,
> +	                      submit->fence);
> +
> +
>  out:
>  	submit_cleanup(submit);
>  	if (ret)
> @@ -594,5 +810,23 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
>  	if (ret && (out_fence_fd >= 0))
>  		put_unused_fd(out_fence_fd);
>  	mutex_unlock(&dev->struct_mutex);
> +
> +out_post_unlock:
> +	if (post_deps) {
> +		for (i = 0; i < args->nr_out_syncobjs; ++i) {
> +			kfree(post_deps[i].chain);
> +			drm_syncobj_put(post_deps[i].syncobj);
> +		}
> +		kfree(post_deps);
> +	}
> +
> +	if (syncobjs_to_reset) {
> +		for (i = 0; i < args->nr_in_syncobjs; ++i) {
> +			if (syncobjs_to_reset[i])
> +				drm_syncobj_put(syncobjs_to_reset[i]);
> +		}
> +		kfree(syncobjs_to_reset);
> +	}
> +
>  	return ret;
>  }
> diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> index 0b85ed6a3710..19806eb3a8e8 100644
> --- a/include/uapi/drm/msm_drm.h
> +++ b/include/uapi/drm/msm_drm.h
> @@ -217,13 +217,28 @@ struct drm_msm_gem_submit_bo {
>  #define MSM_SUBMIT_FENCE_FD_IN   0x40000000 /* enable input fence_fd */
>  #define MSM_SUBMIT_FENCE_FD_OUT  0x20000000 /* enable output fence_fd */
>  #define MSM_SUBMIT_SUDO          0x10000000 /* run submitted cmds from RB */
> +#define MSM_SUBMIT_SYNCOBJ_IN    0x08000000 /* enable input syncobj */
> +#define MSM_SUBMIT_SYNCOBJ_OUT   0x04000000 /* enable output syncobj */
>  #define MSM_SUBMIT_FLAGS                ( \
>  		MSM_SUBMIT_NO_IMPLICIT   | \
>  		MSM_SUBMIT_FENCE_FD_IN   | \
>  		MSM_SUBMIT_FENCE_FD_OUT  | \
>  		MSM_SUBMIT_SUDO          | \
> +		MSM_SUBMIT_SYNCOBJ_IN    | \
> +		MSM_SUBMIT_SYNCOBJ_OUT   | \
>  		0)
>  
> +#define MSM_SUBMIT_SYNCOBJ_RESET 0x00000001 /* Reset syncobj after wait. */
> +#define MSM_SUBMIT_SYNCOBJ_FLAGS        ( \
> +		MSM_SUBMIT_SYNCOBJ_RESET | \
> +		0)
> +
> +struct drm_msm_gem_submit_syncobj {
> +	__u32 handle;     /* in, syncobj handle. */
> +	__u32 flags;      /* in, from MSM_SUBMIT_SYNCOBJ_FLAGS */
> +	__u64 point;      /* in, timepoint for timeline syncobjs. */
> +};
> +
>  /* Each cmdstream submit consists of a table of buffers involved, and
>   * one or more cmdstream buffers.  This allows for conditional execution
>   * (context-restore), and IB buffers needed for per tile/bin draw cmds.
> @@ -236,7 +251,14 @@ struct drm_msm_gem_submit {
>  	__u64 bos;            /* in, ptr to array of submit_bo's */
>  	__u64 cmds;           /* in, ptr to array of submit_cmd's */
>  	__s32 fence_fd;       /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */
> -	__u32 queueid;         /* in, submitqueue id */
> +	__u32 queueid;        /* in, submitqueue id */
> +	__u64 in_syncobjs;    /* in, ptr to to array of drm_msm_gem_submit_syncobj */
> +	__u64 out_syncobjs;   /* in, ptr to to array of drm_msm_gem_submit_syncobj */
> +	__u32 nr_in_syncobjs; /* in, number of entries in in_syncobj */
> +	__u32 nr_out_syncobjs; /* in, number of entries in out_syncobj. */
> +	__u32 syncobj_stride; /* in, stride of syncobj arrays. */
> +	__u32 pad;            /*in, reserved for future use, always 0. */
> +
>  };
>  
>  /* The normal way to synchronize with the GPU is just to CPU_PREP on
> -- 
> 2.24.1
>
Bas Nieuwenhuizen Jan. 17, 2020, 6:32 p.m. UTC | #2
On Fri, Jan 17, 2020 at 7:17 PM Jordan Crouse <jcrouse@codeaurora.org> wrote:
>
> On Fri, Jan 17, 2020 at 12:04:17AM +0100, Bas Nieuwenhuizen wrote:
> > This
> >
> > 1) Enables core DRM syncobj support.
> > 2) Adds options to the submission ioctl to wait/signal syncobjs.
> >
> > Just like the wait fence fd, this does inline waits. Using the
> > scheduler would be nice but I believe it is out of scope for
> > this work.
> >
> > Support for timeline syncobjs is implemented and the interface
> > is ready for it, but I'm not enabling it yet until there is
> > some code for turnip to use it.
> >
> > The reset is mostly in there because in the presence of waiting
> > and signalling the same semaphores, resetting them after
> > signalling can become very annoying.
> >
> > v2:
> >   - Fixed style issues
> >   - Removed a cleanup issue in a failure case
> >   - Moved to a copy_from_user per syncobj
>
> A few nits, but nothing serious.  This is looking good, thanks for the quick
> turn around.
>
> > Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> > ---
> >  drivers/gpu/drm/msm/msm_drv.c        |   6 +-
> >  drivers/gpu/drm/msm/msm_gem_submit.c | 236 ++++++++++++++++++++++++++-
> >  include/uapi/drm/msm_drm.h           |  24 ++-
> >  3 files changed, 262 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> > index c84f0a8b3f2c..5246b41798df 100644
> > --- a/drivers/gpu/drm/msm/msm_drv.c
> > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > @@ -37,9 +37,10 @@
> >   * - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
> >   *           GEM object's debug name
> >   * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
> > + * - 1.6.0 - Syncobj support
> >   */
> >  #define MSM_VERSION_MAJOR    1
> > -#define MSM_VERSION_MINOR    5
> > +#define MSM_VERSION_MINOR    6
> >  #define MSM_VERSION_PATCHLEVEL       0
> >
> >  static const struct drm_mode_config_funcs mode_config_funcs = {
> > @@ -988,7 +989,8 @@ static struct drm_driver msm_driver = {
> >       .driver_features    = DRIVER_GEM |
> >                               DRIVER_RENDER |
> >                               DRIVER_ATOMIC |
> > -                             DRIVER_MODESET,
> > +                             DRIVER_MODESET |
> > +                             DRIVER_SYNCOBJ,
> >       .open               = msm_open,
> >       .postclose           = msm_postclose,
> >       .lastclose          = drm_fb_helper_lastclose,
> > diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> > index be5327af16fa..6c7f95fc5cfd 100644
> > --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> > +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> > @@ -8,7 +8,9 @@
> >  #include <linux/sync_file.h>
> >  #include <linux/uaccess.h>
> >
> > +#include <drm/drm_drv.h>
> >  #include <drm/drm_file.h>
> > +#include <drm/drm_syncobj.h>
> >
> >  #include "msm_drv.h"
> >  #include "msm_gpu.h"
> > @@ -394,6 +396,191 @@ static void submit_cleanup(struct msm_gem_submit *submit)
> >       ww_acquire_fini(&submit->ticket);
> >  }
> >
> > +
> > +struct msm_submit_post_dep {
> > +     struct drm_syncobj *syncobj;
> > +     uint64_t point;
> > +     struct dma_fence_chain *chain;
> > +};
> > +
> > +static int msm_wait_deps(struct drm_device *dev,
> > +                         struct drm_file *file,
> > +                         uint64_t in_syncobjs_addr,
> > +                         uint32_t nr_in_syncobjs,
> > +                         uint32_t syncobj_stride,
> > +                         struct msm_ringbuffer *ring,
> > +                         struct drm_syncobj ***syncobjs)
> > +{
> > +     struct drm_msm_gem_submit_syncobj syncobj_desc = {0};
> > +     int ret = 0;
> > +     uint32_t i, j;
> > +
> > +     *syncobjs = kcalloc(nr_in_syncobjs, sizeof(**syncobjs),
> > +                         GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
>
> Perfect - I think this will do everything we want - protect us against OOM death
> but also not introduce artificial constraints on object counts.
>
> > +     if (!syncobjs) {
>
> You should be able to just return -ENOMEM here.
>
> > +             ret = -ENOMEM;
> > +             goto out_syncobjs;
> > +     }
>
> > +
> > +     for (i = 0; i < nr_in_syncobjs; ++i) {
> > +             uint64_t address = in_syncobjs_addr + i * syncobj_stride;
> > +             struct dma_fence *fence;
> > +
> > +             if (copy_from_user(&syncobj_desc,
> > +                                u64_to_user_ptr(address),
> > +                                min(syncobj_stride, sizeof(syncobj_desc)))) {
> > +                     ret = -EFAULT;
> > +                     goto out_syncobjs;
>
> You can just use break here.
>
> > +             }
> > +
> > +             if (syncobj_desc.point &&
> > +                 !drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) {
> > +                     ret = -EOPNOTSUPP;
> > +                     break;
> > +             }
> > +
> > +             if (syncobj_desc.flags & ~MSM_SUBMIT_SYNCOBJ_FLAGS) {
> > +                     ret = -EINVAL;
> > +                     break;
> > +             }
> > +
> > +             ret = drm_syncobj_find_fence(file, syncobj_desc.handle,
> > +                                          syncobj_desc.point, 0, &fence);
> > +             if (ret)
> > +                     break;
> > +
> > +             if (!dma_fence_match_context(fence, ring->fctx->context))
> > +                     ret = dma_fence_wait(fence, true);
> > +
> > +             dma_fence_put(fence);
> > +             if (ret)
> > +                     break;
> > +
> > +             if (syncobj_desc.flags & MSM_SUBMIT_SYNCOBJ_RESET) {
> > +                     (*syncobjs)[i] =
> > +                             drm_syncobj_find(file, syncobj_desc.handle);
> > +                     if (!(*syncobjs)[i]) {
> > +                             ret = -EINVAL;
> > +                             break;
> > +                     }
> > +             }
> > +     }
> > +
> > +out_syncobjs:
> > +     if (ret && *syncobjs) {
> > +             for (j = 0; j < i; ++j) {
>
> You could also walk backwards from i and save having another iterator:
>
> for( ; i >=0; i--)

I thought about that but the slight annoyance is that from the API
perspective they're all unsigned so a >= 0 doesn't really work.

So we have 3 alternatives:

1) check for INT32_MAX and then use signed
2) keep the iterator
3) do { ... } while (i-- != 0);

I think 1 adds extra complexity for no good reason. (unless we want to
implicitly rely on that failing the kmalloc?) Happy to do 3 if we're
happy with a do while construct.

>
> > +                     if ((*syncobjs)[j])
> > +                             drm_syncobj_put((*syncobjs)[j]);
> > +             }
> > +             kfree(*syncobjs);
> > +             *syncobjs = NULL;
> > +     }
> > +     return ret;
>
> if you wanted to you could return syncobjs or ERR_PTR instead of passing it by
> reference. I would probably chose that option personally but it is up to you.

How does this work wrt returning different error codes?

>
> > +}
> > +
> > +static void msm_reset_syncobjs(struct drm_syncobj **syncobjs,
> > +                               uint32_t nr_syncobjs)
> > +{
> > +     uint32_t i;
> > +
> > +     for (i = 0; syncobjs && i < nr_syncobjs; ++i) {
> > +             if (syncobjs[i])
> > +                     drm_syncobj_replace_fence(syncobjs[i], NULL);
> > +     }
> > +}
> > +
> > +static int msm_parse_post_deps(struct drm_device *dev,
> > +                               struct drm_file *file,
> > +                               uint64_t out_syncobjs_addr,
> > +                               uint32_t nr_out_syncobjs,
> > +                            uint32_t syncobj_stride,
> > +                               struct msm_submit_post_dep **post_deps)
> > +{
> > +     int ret = 0;
> > +     uint32_t i, j;
> > +
> > +     *post_deps = kmalloc_array(nr_out_syncobjs, sizeof(**post_deps),
> > +                                GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
> > +     if (!*post_deps) {
> > +             ret = -ENOMEM;
> > +             goto out_syncobjs;
>
> return -ENOMEM works fine.
>
> > +     }
> > +
> > +     for (i = 0; i < nr_out_syncobjs; ++i) {
> > +             uint64_t address = out_syncobjs_addr + i * syncobj_stride;
> > +
> > +             if (copy_from_user(&syncobj_desc,
> > +                                u64_to_user_ptr(address),
> > +                                min(syncobj_stride, sizeof(syncobj_desc)))) {
> > +                     ret = -EFAULT;
> > +                     goto out_syncobjs;
>
> You can break here.
>
> > +             }
> > +
> > +             (*post_deps)[i].point = syncobj_desc.point;
> > +             (*post_deps)[i].chain = NULL;
> > +
> > +             if (syncobj_desc.flags) {
> > +                     ret = -EINVAL;
> > +                     break;
> > +             }
> > +
> > +             if (syncobj_desc.point) {
> > +                     if (!drm_core_check_feature(dev,
> > +                                                 DRIVER_SYNCOBJ_TIMELINE)) {
> > +                             ret = -EOPNOTSUPP;
> > +                             break;
> > +                     }
> > +
> > +                     (*post_deps)[i].chain =
> > +                             kmalloc(sizeof(*(*post_deps)[i].chain),
> > +                                     GFP_KERNEL);
> > +                     if (!(*post_deps)[i].chain) {
> > +                             ret = -ENOMEM;
> > +                             break;
> > +                     }
> > +             }
> > +
> > +             (*post_deps)[i].syncobj =
> > +                     drm_syncobj_find(file, syncobj_desc.handle);
> > +             if (!(*post_deps)[i].syncobj) {
> > +                     ret = -EINVAL;
> > +                     break;
> > +             }
> > +     }
> > +
> > +     if (ret) {
> > +             for (j = 0; j <= i; ++j) {
>
> Same suggest as above, if you would prefer.
>
> > +                     kfree((*post_deps)[j].chain);
> > +                     if ((*post_deps)[j].syncobj)
> > +                             drm_syncobj_put((*post_deps)[j].syncobj);
> > +             }
> > +
> > +             kfree(*post_deps);
> > +             *post_deps = NULL;
> > +     }
> > +
> > +out_syncobjs:
> > +     return ret;
>
> This might also be a good spot to return post_deps / ERR_PTR.
>
> > +}
> > +
> > +static void msm_process_post_deps(struct msm_submit_post_dep *post_deps,
> > +                                  uint32_t count, struct dma_fence *fence)
> > +{
> > +     uint32_t i;
> > +
> > +     for (i = 0; post_deps && i < count; ++i) {
> > +             if (post_deps[i].chain) {
> > +                     drm_syncobj_add_point(post_deps[i].syncobj,
> > +                                           post_deps[i].chain,
> > +                                           fence, post_deps[i].point);
> > +                     post_deps[i].chain = NULL;
> > +             } else {
> > +                     drm_syncobj_replace_fence(post_deps[i].syncobj,
> > +                                               fence);
> > +             }
> > +     }
> > +}
> > +
> >  int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> >               struct drm_file *file)
> >  {
> > @@ -406,6 +593,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> >       struct sync_file *sync_file = NULL;
> >       struct msm_gpu_submitqueue *queue;
> >       struct msm_ringbuffer *ring;
> > +     struct msm_submit_post_dep *post_deps = NULL;
> > +     struct drm_syncobj **syncobjs_to_reset = NULL;
> >       int out_fence_fd = -1;
> >       struct pid *pid = get_pid(task_pid(current));
> >       unsigned i;
> > @@ -413,6 +602,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> >       if (!gpu)
> >               return -ENXIO;
> >
> > +     if (args->pad)
> > +             return -EINVAL;
> > +
>
> >       /* for now, we just have 3d pipe.. eventually this would need to
> >        * be more clever to dispatch to appropriate gpu module:
> >        */
> > @@ -460,9 +652,28 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> >                       return ret;
> >       }
> >
> > +     if (args->flags & MSM_SUBMIT_SYNCOBJ_IN) {
> > +             ret = msm_wait_deps(dev, file,
> > +                                 args->in_syncobjs, args->nr_in_syncobjs,
> > +                                 args->syncobj_stride, ring,
>
> If you wanted to, you could just pass args directly to the function so you
> didn't have to take it apart.

I think the advantage here is making it really explicit what part of
args is not used, and I don't feel the number of args has quite gone
out of control yet, but happy to change if insist.

(sorry, not seeing if this is a "you could do that" vs. a "I would
prefer if you do that, politely")

>
> Also, Rob - do we want to do the trick where we return
> sizeof(drm_msm_gem_submit_syncobj) if the input stride is zero?

Do you mean making this an in/out field and modifying it to return the
size if a stride of 0 has been given? If so, I don't see the point,
because userspace would not know what to fill any extra size with
(sure they can 0 initialize the extra part, but the kernel already
does that zero initializing the struct to be copied into).

or do you mean interpreting stride as
sizeof(drm_msm_gem_submit_syncobj) if the input stride is 0? In that
case I think that is just an invitation for userspace to set itself up
for incompatibility issues when the size actually changes. Let's
enforce doing it right.

(or did I misunderstand entirely?)

>
> > +                                 &syncobjs_to_reset);
> > +             if (ret)
> > +                     goto out_post_unlock;
> > +     }
> > +
> > +     if (args->flags & MSM_SUBMIT_SYNCOBJ_OUT) {
> > +             ret = msm_parse_post_deps(dev, file,
> > +                                       args->out_syncobjs,
> > +                                       args->nr_out_syncobjs,
> > +                                       args->syncobj_stride,
>
> And same.
>
> > +                                       &post_deps);
> > +             if (ret)
> > +                     goto out_post_unlock;
> > +     }
> > +
> >       ret = mutex_lock_interruptible(&dev->struct_mutex);
> >       if (ret)
> > -             return ret;
> > +             goto out_post_unlock;
> >
> >       if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
> >               out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
> > @@ -586,6 +797,11 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> >               args->fence_fd = out_fence_fd;
> >       }
> >
> > +     msm_reset_syncobjs(syncobjs_to_reset, args->nr_in_syncobjs);
> > +     msm_process_post_deps(post_deps, args->nr_out_syncobjs,
> > +                           submit->fence);
> > +
> > +
> >  out:
> >       submit_cleanup(submit);
> >       if (ret)
> > @@ -594,5 +810,23 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> >       if (ret && (out_fence_fd >= 0))
> >               put_unused_fd(out_fence_fd);
> >       mutex_unlock(&dev->struct_mutex);
> > +
> > +out_post_unlock:
> > +     if (post_deps) {
> > +             for (i = 0; i < args->nr_out_syncobjs; ++i) {
> > +                     kfree(post_deps[i].chain);
> > +                     drm_syncobj_put(post_deps[i].syncobj);
> > +             }
> > +             kfree(post_deps);
> > +     }
> > +
> > +     if (syncobjs_to_reset) {
> > +             for (i = 0; i < args->nr_in_syncobjs; ++i) {
> > +                     if (syncobjs_to_reset[i])
> > +                             drm_syncobj_put(syncobjs_to_reset[i]);
> > +             }
> > +             kfree(syncobjs_to_reset);
> > +     }
> > +
> >       return ret;
> >  }
> > diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> > index 0b85ed6a3710..19806eb3a8e8 100644
> > --- a/include/uapi/drm/msm_drm.h
> > +++ b/include/uapi/drm/msm_drm.h
> > @@ -217,13 +217,28 @@ struct drm_msm_gem_submit_bo {
> >  #define MSM_SUBMIT_FENCE_FD_IN   0x40000000 /* enable input fence_fd */
> >  #define MSM_SUBMIT_FENCE_FD_OUT  0x20000000 /* enable output fence_fd */
> >  #define MSM_SUBMIT_SUDO          0x10000000 /* run submitted cmds from RB */
> > +#define MSM_SUBMIT_SYNCOBJ_IN    0x08000000 /* enable input syncobj */
> > +#define MSM_SUBMIT_SYNCOBJ_OUT   0x04000000 /* enable output syncobj */
> >  #define MSM_SUBMIT_FLAGS                ( \
> >               MSM_SUBMIT_NO_IMPLICIT   | \
> >               MSM_SUBMIT_FENCE_FD_IN   | \
> >               MSM_SUBMIT_FENCE_FD_OUT  | \
> >               MSM_SUBMIT_SUDO          | \
> > +             MSM_SUBMIT_SYNCOBJ_IN    | \
> > +             MSM_SUBMIT_SYNCOBJ_OUT   | \
> >               0)
> >
> > +#define MSM_SUBMIT_SYNCOBJ_RESET 0x00000001 /* Reset syncobj after wait. */
> > +#define MSM_SUBMIT_SYNCOBJ_FLAGS        ( \
> > +             MSM_SUBMIT_SYNCOBJ_RESET | \
> > +             0)
> > +
> > +struct drm_msm_gem_submit_syncobj {
> > +     __u32 handle;     /* in, syncobj handle. */
> > +     __u32 flags;      /* in, from MSM_SUBMIT_SYNCOBJ_FLAGS */
> > +     __u64 point;      /* in, timepoint for timeline syncobjs. */
> > +};
> > +
> >  /* Each cmdstream submit consists of a table of buffers involved, and
> >   * one or more cmdstream buffers.  This allows for conditional execution
> >   * (context-restore), and IB buffers needed for per tile/bin draw cmds.
> > @@ -236,7 +251,14 @@ struct drm_msm_gem_submit {
> >       __u64 bos;            /* in, ptr to array of submit_bo's */
> >       __u64 cmds;           /* in, ptr to array of submit_cmd's */
> >       __s32 fence_fd;       /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */
> > -     __u32 queueid;         /* in, submitqueue id */
> > +     __u32 queueid;        /* in, submitqueue id */
> > +     __u64 in_syncobjs;    /* in, ptr to to array of drm_msm_gem_submit_syncobj */
> > +     __u64 out_syncobjs;   /* in, ptr to to array of drm_msm_gem_submit_syncobj */
> > +     __u32 nr_in_syncobjs; /* in, number of entries in in_syncobj */
> > +     __u32 nr_out_syncobjs; /* in, number of entries in out_syncobj. */
> > +     __u32 syncobj_stride; /* in, stride of syncobj arrays. */
> > +     __u32 pad;            /*in, reserved for future use, always 0. */
> > +
> >  };
> >
> >  /* The normal way to synchronize with the GPU is just to CPU_PREP on
> > --
> > 2.24.1
> >
>
> --
> The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
> a Linux Foundation Collaborative Project
kernel test robot Jan. 18, 2020, 12:43 p.m. UTC | #3
Hi Bas,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tegra-drm/drm/tegra/for-next]
[also build test ERROR on drm-tip/drm-tip linus/master v5.5-rc6 next-20200117]
[cannot apply to drm-exynos/exynos-drm-next drm-intel/for-linux-next drm/drm-next]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Bas-Nieuwenhuizen/drm-msm-Add-syncobj-support/20200118-033342
base:   git://anongit.freedesktop.org/tegra/linux.git drm/tegra/for-next
config: arm-defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (GCC) 7.5.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.5.0 make.cross ARCH=arm 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

   drivers/gpu/drm/msm/msm_gem_submit.c: In function 'msm_parse_post_deps':
>> drivers/gpu/drm/msm/msm_gem_submit.c:512:23: error: 'syncobj_desc' undeclared (first use in this function); did you mean 'syncobj_stride'?
      if (copy_from_user(&syncobj_desc,
                          ^~~~~~~~~~~~
                          syncobj_stride
   drivers/gpu/drm/msm/msm_gem_submit.c:512:23: note: each undeclared identifier is reported only once for each function it appears in
   In file included from include/linux/list.h:9:0,
                    from include/linux/preempt.h:11,
                    from include/linux/spinlock.h:51,
                    from include/linux/seqlock.h:36,
                    from include/linux/time.h:6,
                    from include/linux/ktime.h:24,
                    from include/linux/sync_file.h:17,
                    from drivers/gpu/drm/msm/msm_gem_submit.c:8:
>> include/linux/kernel.h:868:2: error: first argument to '__builtin_choose_expr' not a constant
     __builtin_choose_expr(__safe_cmp(x, y), \
     ^
   include/linux/kernel.h:877:19: note: in expansion of macro '__careful_cmp'
    #define min(x, y) __careful_cmp(x, y, <)
                      ^~~~~~~~~~~~~
>> drivers/gpu/drm/msm/msm_gem_submit.c:514:15: note: in expansion of macro 'min'
                  min(syncobj_stride, sizeof(syncobj_desc)))) {
                  ^~~

vim +512 drivers/gpu/drm/msm/msm_gem_submit.c

   491	
   492	static int msm_parse_post_deps(struct drm_device *dev,
   493	                               struct drm_file *file,
   494	                               uint64_t out_syncobjs_addr,
   495	                               uint32_t nr_out_syncobjs,
   496				       uint32_t syncobj_stride,
   497	                               struct msm_submit_post_dep **post_deps)
   498	{
   499		int ret = 0;
   500		uint32_t i, j;
   501	
   502		*post_deps = kmalloc_array(nr_out_syncobjs, sizeof(**post_deps),
   503		                           GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
   504		if (!*post_deps) {
   505			ret = -ENOMEM;
   506			goto out_syncobjs;
   507		}
   508	
   509		for (i = 0; i < nr_out_syncobjs; ++i) {
   510			uint64_t address = out_syncobjs_addr + i * syncobj_stride;
   511	
 > 512			if (copy_from_user(&syncobj_desc,
   513				           u64_to_user_ptr(address),
 > 514				           min(syncobj_stride, sizeof(syncobj_desc)))) {
   515				ret = -EFAULT;
   516				goto out_syncobjs;
   517			}
   518	
   519			(*post_deps)[i].point = syncobj_desc.point;
   520			(*post_deps)[i].chain = NULL;
   521	
   522			if (syncobj_desc.flags) {
   523				ret = -EINVAL;
   524				break;
   525			}
   526	
   527			if (syncobj_desc.point) {
   528				if (!drm_core_check_feature(dev,
   529				                            DRIVER_SYNCOBJ_TIMELINE)) {
   530					ret = -EOPNOTSUPP;
   531					break;
   532				}
   533	
   534				(*post_deps)[i].chain =
   535					kmalloc(sizeof(*(*post_deps)[i].chain),
   536					        GFP_KERNEL);
   537				if (!(*post_deps)[i].chain) {
   538					ret = -ENOMEM;
   539					break;
   540				}
   541			}
   542	
   543			(*post_deps)[i].syncobj =
   544				drm_syncobj_find(file, syncobj_desc.handle);
   545			if (!(*post_deps)[i].syncobj) {
   546				ret = -EINVAL;
   547				break;
   548			}
   549		}
   550	
   551		if (ret) {
   552			for (j = 0; j <= i; ++j) {
   553				kfree((*post_deps)[j].chain);
   554				if ((*post_deps)[j].syncobj)
   555					drm_syncobj_put((*post_deps)[j].syncobj);
   556			}
   557	
   558			kfree(*post_deps);
   559			*post_deps = NULL;
   560		}
   561	
   562	out_syncobjs:
   563		return ret;
   564	}
   565	

---
0-DAY kernel test infrastructure                 Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation
Jordan Crouse Jan. 20, 2020, 4:06 p.m. UTC | #4
On Fri, Jan 17, 2020 at 07:32:27PM +0100, Bas Nieuwenhuizen wrote:
> On Fri, Jan 17, 2020 at 7:17 PM Jordan Crouse <jcrouse@codeaurora.org> wrote:
> >
> > On Fri, Jan 17, 2020 at 12:04:17AM +0100, Bas Nieuwenhuizen wrote:
> > > This
> > >
> > > 1) Enables core DRM syncobj support.
> > > 2) Adds options to the submission ioctl to wait/signal syncobjs.
> > >
> > > Just like the wait fence fd, this does inline waits. Using the
> > > scheduler would be nice but I believe it is out of scope for
> > > this work.
> > >
> > > Support for timeline syncobjs is implemented and the interface
> > > is ready for it, but I'm not enabling it yet until there is
> > > some code for turnip to use it.
> > >
> > > The reset is mostly in there because in the presence of waiting
> > > and signalling the same semaphores, resetting them after
> > > signalling can become very annoying.
> > >
> > > v2:
> > >   - Fixed style issues
> > >   - Removed a cleanup issue in a failure case
> > >   - Moved to a copy_from_user per syncobj
> >
> > A few nits, but nothing serious.  This is looking good, thanks for the quick
> > turn around.
> >
> > > Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

<snip>

> > > +out_syncobjs:
> > > +     if (ret && *syncobjs) {
> > > +             for (j = 0; j < i; ++j) {
> >
> > You could also walk backwards from i and save having another iterator:
> >
> > for( ; i >=0; i--)
> 
> I thought about that but the slight annoyance is that from the API
> perspective they're all unsigned so a >= 0 doesn't really work.


> So we have 3 alternatives:
> 
> 1) check for INT32_MAX and then use signed
> 2) keep the iterator
> 3) do { ... } while (i-- != 0);
> 
> I think 1 adds extra complexity for no good reason. (unless we want to
> implicitly rely on that failing the kmalloc?) Happy to do 3 if we're
> happy with a do while construct.

Ah, good point on the unsigned-ness. Keeping the iterator is fine.  No reason to
try to be overly clever.

> 
> >
> > > +                     if ((*syncobjs)[j])
> > > +                             drm_syncobj_put((*syncobjs)[j]);
> > > +             }
> > > +             kfree(*syncobjs);
> > > +             *syncobjs = NULL;
> > > +     }
> > > +     return ret;
> >
> > if you wanted to you could return syncobjs or ERR_PTR instead of passing it by
> > reference. I would probably chose that option personally but it is up to you.
> 
> How does this work wrt returning different error codes?

For each error code, you would use the ERR_PTR() macro, so for example,

return ERR_PTR(-ENOMEM);

and then for the success path you would return the actual pointer,

return syncobjs;

The caller would use the IS_ERR() macro to check the return value. It can also
get the error code back with PTR_ERR() to send to the upper levels.

It is up to you. I personally like using ERR_PTR() but as long as the code works
whichever method you choose is fine by me.
> >
> > > +}
> > > +
> > > +static void msm_reset_syncobjs(struct drm_syncobj **syncobjs,
> > > +                               uint32_t nr_syncobjs)
> > > +{
> > > +     uint32_t i;
> > > +
> > > +     for (i = 0; syncobjs && i < nr_syncobjs; ++i) {
> > > +             if (syncobjs[i])
> > > +                     drm_syncobj_replace_fence(syncobjs[i], NULL);
> > > +     }
> > > +}
> > > +
> > > +static int msm_parse_post_deps(struct drm_device *dev,
> > > +                               struct drm_file *file,
> > > +                               uint64_t out_syncobjs_addr,
> > > +                               uint32_t nr_out_syncobjs,
> > > +                            uint32_t syncobj_stride,
> > > +                               struct msm_submit_post_dep **post_deps)
> > > +{
> > > +     int ret = 0;
> > > +     uint32_t i, j;
> > > +
> > > +     *post_deps = kmalloc_array(nr_out_syncobjs, sizeof(**post_deps),
> > > +                                GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
> > > +     if (!*post_deps) {
> > > +             ret = -ENOMEM;
> > > +             goto out_syncobjs;
> >
> > return -ENOMEM works fine.
> >
> > > +     }
> > > +
> > > +     for (i = 0; i < nr_out_syncobjs; ++i) {
> > > +             uint64_t address = out_syncobjs_addr + i * syncobj_stride;
> > > +
> > > +             if (copy_from_user(&syncobj_desc,
> > > +                                u64_to_user_ptr(address),
> > > +                                min(syncobj_stride, sizeof(syncobj_desc)))) {
> > > +                     ret = -EFAULT;
> > > +                     goto out_syncobjs;
> >
> > You can break here.
> >
> > > +             }
> > > +
> > > +             (*post_deps)[i].point = syncobj_desc.point;
> > > +             (*post_deps)[i].chain = NULL;
> > > +
> > > +             if (syncobj_desc.flags) {
> > > +                     ret = -EINVAL;
> > > +                     break;
> > > +             }
> > > +
> > > +             if (syncobj_desc.point) {
> > > +                     if (!drm_core_check_feature(dev,
> > > +                                                 DRIVER_SYNCOBJ_TIMELINE)) {
> > > +                             ret = -EOPNOTSUPP;
> > > +                             break;
> > > +                     }
> > > +
> > > +                     (*post_deps)[i].chain =
> > > +                             kmalloc(sizeof(*(*post_deps)[i].chain),
> > > +                                     GFP_KERNEL);
> > > +                     if (!(*post_deps)[i].chain) {
> > > +                             ret = -ENOMEM;
> > > +                             break;
> > > +                     }
> > > +             }
> > > +
> > > +             (*post_deps)[i].syncobj =
> > > +                     drm_syncobj_find(file, syncobj_desc.handle);
> > > +             if (!(*post_deps)[i].syncobj) {
> > > +                     ret = -EINVAL;
> > > +                     break;
> > > +             }
> > > +     }
> > > +
> > > +     if (ret) {
> > > +             for (j = 0; j <= i; ++j) {
> >
> > Same suggest as above, if you would prefer.
> >
> > > +                     kfree((*post_deps)[j].chain);
> > > +                     if ((*post_deps)[j].syncobj)
> > > +                             drm_syncobj_put((*post_deps)[j].syncobj);
> > > +             }
> > > +
> > > +             kfree(*post_deps);
> > > +             *post_deps = NULL;
> > > +     }
> > > +
> > > +out_syncobjs:
> > > +     return ret;
> >
> > This might also be a good spot to return post_deps / ERR_PTR.
> >
> > > +}
> > > +
> > > +static void msm_process_post_deps(struct msm_submit_post_dep *post_deps,
> > > +                                  uint32_t count, struct dma_fence *fence)
> > > +{
> > > +     uint32_t i;
> > > +
> > > +     for (i = 0; post_deps && i < count; ++i) {
> > > +             if (post_deps[i].chain) {
> > > +                     drm_syncobj_add_point(post_deps[i].syncobj,
> > > +                                           post_deps[i].chain,
> > > +                                           fence, post_deps[i].point);
> > > +                     post_deps[i].chain = NULL;
> > > +             } else {
> > > +                     drm_syncobj_replace_fence(post_deps[i].syncobj,
> > > +                                               fence);
> > > +             }
> > > +     }
> > > +}
> > > +
> > >  int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> > >               struct drm_file *file)
> > >  {
> > > @@ -406,6 +593,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> > >       struct sync_file *sync_file = NULL;
> > >       struct msm_gpu_submitqueue *queue;
> > >       struct msm_ringbuffer *ring;
> > > +     struct msm_submit_post_dep *post_deps = NULL;
> > > +     struct drm_syncobj **syncobjs_to_reset = NULL;
> > >       int out_fence_fd = -1;
> > >       struct pid *pid = get_pid(task_pid(current));
> > >       unsigned i;
> > > @@ -413,6 +602,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> > >       if (!gpu)
> > >               return -ENXIO;
> > >
> > > +     if (args->pad)
> > > +             return -EINVAL;
> > > +
> >
> > >       /* for now, we just have 3d pipe.. eventually this would need to
> > >        * be more clever to dispatch to appropriate gpu module:
> > >        */
> > > @@ -460,9 +652,28 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> > >                       return ret;
> > >       }
> > >
> > > +     if (args->flags & MSM_SUBMIT_SYNCOBJ_IN) {
> > > +             ret = msm_wait_deps(dev, file,
> > > +                                 args->in_syncobjs, args->nr_in_syncobjs,
> > > +                                 args->syncobj_stride, ring,
> >
> > If you wanted to, you could just pass args directly to the function so you
> > didn't have to take it apart.
> 
> I think the advantage here is making it really explicit what part of
> args is not used, and I don't feel the number of args has quite gone
> out of control yet, but happy to change if insist.
> 
> (sorry, not seeing if this is a "you could do that" vs. a "I would
> prefer if you do that, politely")

This was a "you could do that". I try to not be too angry about matters of
personal coding preference.

> >
> > Also, Rob - do we want to do the trick where we return
> > sizeof(drm_msm_gem_submit_syncobj) if the input stride is zero?
> 
> Do you mean making this an in/out field and modifying it to return the
> size if a stride of 0 has been given? If so, I don't see the point,
> because userspace would not know what to fill any extra size with
> (sure they can 0 initialize the extra part, but the kernel already
> does that zero initializing the struct to be copied into).
> 
> or do you mean interpreting stride as
> sizeof(drm_msm_gem_submit_syncobj) if the input stride is 0? In that
> case I think that is just an invitation for userspace to set itself up
> for incompatibility issues when the size actually changes. Let's
> enforce doing it right.
> 
> (or did I misunderstand entirely?)

The general idea is to build in a bit of compatibility for user space. Say for
example that we add a new member to drm_mrm_gem_submit_syncobj and add support
for it in the kernel and user space.  And then later the userspace library is
used with an older kernel that (for the purposes of this experiment) supports
the syncobj interface but not the expanded struct. If user space queried the
size of the struct it could use the returned size from the kernel to determine
if the expanded support was there or not. We do something similar to this for
submitqueue queries [1].

I couched it as a question because I'm not sure if that sort of query would
be useful here and I'm not sure if somebody would go to the effort of setting
up a (partial) submit just for the query, but we've been working on making
ourselves more resilient so I figured I would raise the point.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/gpu/drm/msm/msm_submitqueue.c#n122

Jordan

<snip>
Bas Nieuwenhuizen Jan. 23, 2020, 11:35 p.m. UTC | #5
On Mon, Jan 20, 2020 at 5:06 PM Jordan Crouse <jcrouse@codeaurora.org> wrote:
>
> On Fri, Jan 17, 2020 at 07:32:27PM +0100, Bas Nieuwenhuizen wrote:
> > On Fri, Jan 17, 2020 at 7:17 PM Jordan Crouse <jcrouse@codeaurora.org> wrote:
> > >
> > > On Fri, Jan 17, 2020 at 12:04:17AM +0100, Bas Nieuwenhuizen wrote:
> > > > This
> > > >
> > > > 1) Enables core DRM syncobj support.
> > > > 2) Adds options to the submission ioctl to wait/signal syncobjs.
> > > >
> > > > Just like the wait fence fd, this does inline waits. Using the
> > > > scheduler would be nice but I believe it is out of scope for
> > > > this work.
> > > >
> > > > Support for timeline syncobjs is implemented and the interface
> > > > is ready for it, but I'm not enabling it yet until there is
> > > > some code for turnip to use it.
> > > >
> > > > The reset is mostly in there because in the presence of waiting
> > > > and signalling the same semaphores, resetting them after
> > > > signalling can become very annoying.
> > > >
> > > > v2:
> > > >   - Fixed style issues
> > > >   - Removed a cleanup issue in a failure case
> > > >   - Moved to a copy_from_user per syncobj
> > >
> > > A few nits, but nothing serious.  This is looking good, thanks for the quick
> > > turn around.
> > >
> > > > Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>
> <snip>
>
> > > > +out_syncobjs:
> > > > +     if (ret && *syncobjs) {
> > > > +             for (j = 0; j < i; ++j) {
> > >
> > > You could also walk backwards from i and save having another iterator:
> > >
> > > for( ; i >=0; i--)
> >
> > I thought about that but the slight annoyance is that from the API
> > perspective they're all unsigned so a >= 0 doesn't really work.
>
>
> > So we have 3 alternatives:
> >
> > 1) check for INT32_MAX and then use signed
> > 2) keep the iterator
> > 3) do { ... } while (i-- != 0);
> >
> > I think 1 adds extra complexity for no good reason. (unless we want to
> > implicitly rely on that failing the kmalloc?) Happy to do 3 if we're
> > happy with a do while construct.
>
> Ah, good point on the unsigned-ness. Keeping the iterator is fine.  No reason to
> try to be overly clever.
>
> >
> > >
> > > > +                     if ((*syncobjs)[j])
> > > > +                             drm_syncobj_put((*syncobjs)[j]);
> > > > +             }
> > > > +             kfree(*syncobjs);
> > > > +             *syncobjs = NULL;
> > > > +     }
> > > > +     return ret;
> > >
> > > if you wanted to you could return syncobjs or ERR_PTR instead of passing it by
> > > reference. I would probably chose that option personally but it is up to you.
> >
> > How does this work wrt returning different error codes?
>
> For each error code, you would use the ERR_PTR() macro, so for example,
>
> return ERR_PTR(-ENOMEM);
>
> and then for the success path you would return the actual pointer,
>
> return syncobjs;
>
> The caller would use the IS_ERR() macro to check the return value. It can also
> get the error code back with PTR_ERR() to send to the upper levels.
>
> It is up to you. I personally like using ERR_PTR() but as long as the code works
> whichever method you choose is fine by me.
> > >
> > > > +}
> > > > +
> > > > +static void msm_reset_syncobjs(struct drm_syncobj **syncobjs,
> > > > +                               uint32_t nr_syncobjs)
> > > > +{
> > > > +     uint32_t i;
> > > > +
> > > > +     for (i = 0; syncobjs && i < nr_syncobjs; ++i) {
> > > > +             if (syncobjs[i])
> > > > +                     drm_syncobj_replace_fence(syncobjs[i], NULL);
> > > > +     }
> > > > +}
> > > > +
> > > > +static int msm_parse_post_deps(struct drm_device *dev,
> > > > +                               struct drm_file *file,
> > > > +                               uint64_t out_syncobjs_addr,
> > > > +                               uint32_t nr_out_syncobjs,
> > > > +                            uint32_t syncobj_stride,
> > > > +                               struct msm_submit_post_dep **post_deps)
> > > > +{
> > > > +     int ret = 0;
> > > > +     uint32_t i, j;
> > > > +
> > > > +     *post_deps = kmalloc_array(nr_out_syncobjs, sizeof(**post_deps),
> > > > +                                GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
> > > > +     if (!*post_deps) {
> > > > +             ret = -ENOMEM;
> > > > +             goto out_syncobjs;
> > >
> > > return -ENOMEM works fine.
> > >
> > > > +     }
> > > > +
> > > > +     for (i = 0; i < nr_out_syncobjs; ++i) {
> > > > +             uint64_t address = out_syncobjs_addr + i * syncobj_stride;
> > > > +
> > > > +             if (copy_from_user(&syncobj_desc,
> > > > +                                u64_to_user_ptr(address),
> > > > +                                min(syncobj_stride, sizeof(syncobj_desc)))) {
> > > > +                     ret = -EFAULT;
> > > > +                     goto out_syncobjs;
> > >
> > > You can break here.
> > >
> > > > +             }
> > > > +
> > > > +             (*post_deps)[i].point = syncobj_desc.point;
> > > > +             (*post_deps)[i].chain = NULL;
> > > > +
> > > > +             if (syncobj_desc.flags) {
> > > > +                     ret = -EINVAL;
> > > > +                     break;
> > > > +             }
> > > > +
> > > > +             if (syncobj_desc.point) {
> > > > +                     if (!drm_core_check_feature(dev,
> > > > +                                                 DRIVER_SYNCOBJ_TIMELINE)) {
> > > > +                             ret = -EOPNOTSUPP;
> > > > +                             break;
> > > > +                     }
> > > > +
> > > > +                     (*post_deps)[i].chain =
> > > > +                             kmalloc(sizeof(*(*post_deps)[i].chain),
> > > > +                                     GFP_KERNEL);
> > > > +                     if (!(*post_deps)[i].chain) {
> > > > +                             ret = -ENOMEM;
> > > > +                             break;
> > > > +                     }
> > > > +             }
> > > > +
> > > > +             (*post_deps)[i].syncobj =
> > > > +                     drm_syncobj_find(file, syncobj_desc.handle);
> > > > +             if (!(*post_deps)[i].syncobj) {
> > > > +                     ret = -EINVAL;
> > > > +                     break;
> > > > +             }
> > > > +     }
> > > > +
> > > > +     if (ret) {
> > > > +             for (j = 0; j <= i; ++j) {
> > >
> > > Same suggest as above, if you would prefer.
> > >
> > > > +                     kfree((*post_deps)[j].chain);
> > > > +                     if ((*post_deps)[j].syncobj)
> > > > +                             drm_syncobj_put((*post_deps)[j].syncobj);
> > > > +             }
> > > > +
> > > > +             kfree(*post_deps);
> > > > +             *post_deps = NULL;
> > > > +     }
> > > > +
> > > > +out_syncobjs:
> > > > +     return ret;
> > >
> > > This might also be a good spot to return post_deps / ERR_PTR.
> > >
> > > > +}
> > > > +
> > > > +static void msm_process_post_deps(struct msm_submit_post_dep *post_deps,
> > > > +                                  uint32_t count, struct dma_fence *fence)
> > > > +{
> > > > +     uint32_t i;
> > > > +
> > > > +     for (i = 0; post_deps && i < count; ++i) {
> > > > +             if (post_deps[i].chain) {
> > > > +                     drm_syncobj_add_point(post_deps[i].syncobj,
> > > > +                                           post_deps[i].chain,
> > > > +                                           fence, post_deps[i].point);
> > > > +                     post_deps[i].chain = NULL;
> > > > +             } else {
> > > > +                     drm_syncobj_replace_fence(post_deps[i].syncobj,
> > > > +                                               fence);
> > > > +             }
> > > > +     }
> > > > +}
> > > > +
> > > >  int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> > > >               struct drm_file *file)
> > > >  {
> > > > @@ -406,6 +593,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> > > >       struct sync_file *sync_file = NULL;
> > > >       struct msm_gpu_submitqueue *queue;
> > > >       struct msm_ringbuffer *ring;
> > > > +     struct msm_submit_post_dep *post_deps = NULL;
> > > > +     struct drm_syncobj **syncobjs_to_reset = NULL;
> > > >       int out_fence_fd = -1;
> > > >       struct pid *pid = get_pid(task_pid(current));
> > > >       unsigned i;
> > > > @@ -413,6 +602,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> > > >       if (!gpu)
> > > >               return -ENXIO;
> > > >
> > > > +     if (args->pad)
> > > > +             return -EINVAL;
> > > > +
> > >
> > > >       /* for now, we just have 3d pipe.. eventually this would need to
> > > >        * be more clever to dispatch to appropriate gpu module:
> > > >        */
> > > > @@ -460,9 +652,28 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> > > >                       return ret;
> > > >       }
> > > >
> > > > +     if (args->flags & MSM_SUBMIT_SYNCOBJ_IN) {
> > > > +             ret = msm_wait_deps(dev, file,
> > > > +                                 args->in_syncobjs, args->nr_in_syncobjs,
> > > > +                                 args->syncobj_stride, ring,
> > >
> > > If you wanted to, you could just pass args directly to the function so you
> > > didn't have to take it apart.
> >
> > I think the advantage here is making it really explicit what part of
> > args is not used, and I don't feel the number of args has quite gone
> > out of control yet, but happy to change if insist.
> >
> > (sorry, not seeing if this is a "you could do that" vs. a "I would
> > prefer if you do that, politely")
>
> This was a "you could do that". I try to not be too angry about matters of
> personal coding preference.
>
> > >
> > > Also, Rob - do we want to do the trick where we return
> > > sizeof(drm_msm_gem_submit_syncobj) if the input stride is zero?
> >
> > Do you mean making this an in/out field and modifying it to return the
> > size if a stride of 0 has been given? If so, I don't see the point,
> > because userspace would not know what to fill any extra size with
> > (sure they can 0 initialize the extra part, but the kernel already
> > does that zero initializing the struct to be copied into).
> >
> > or do you mean interpreting stride as
> > sizeof(drm_msm_gem_submit_syncobj) if the input stride is 0? In that
> > case I think that is just an invitation for userspace to set itself up
> > for incompatibility issues when the size actually changes. Let's
> > enforce doing it right.
> >
> > (or did I misunderstand entirely?)
>
> The general idea is to build in a bit of compatibility for user space. Say for
> example that we add a new member to drm_mrm_gem_submit_syncobj and add support
> for it in the kernel and user space.  And then later the userspace library is
> used with an older kernel that (for the purposes of this experiment) supports
> the syncobj interface but not the expanded struct. If user space queried the
> size of the struct it could use the returned size from the kernel to determine
> if the expanded support was there or not. We do something similar to this for
> submitqueue queries [1].
>
> I couched it as a question because I'm not sure if that sort of query would
> be useful here and I'm not sure if somebody would go to the effort of setting
> up a (partial) submit just for the query, but we've been working on making
> ourselves more resilient so I figured I would raise the point.

So I think the driver version is enough here. For AMDGPU this has been
working quite nicely. Furthermore:

1) if the kernel returns a bigger size than last known app size, then
the app does not know what to fill it with, and can't do anything more
useful than just filling with zeros. Here the kernel does it anyway by
zero-initializing the entire struct before the copy_from_user.
2) if the kernel returns a smaller size than the last known app size
then some app-known feature is not supported. Now the app has to
figure out from the size what features are supported, which is kind of
ugly.
3) if the sizes are equal we still don't know what features are
supported as e.g. adding a flag does not change size.

I think the driver version adequately handles all 3 cases, and saves a
dummy call that makes for cheaper application startup.

(again, if there ideas about applying this widely in the driver, happy
to comply)
>
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/gpu/drm/msm/msm_submitqueue.c#n122
>
> Jordan
>
> <snip>
>
> --
> The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
> a Linux Foundation Collaborative Project
> _______________________________________________
> Freedreno mailing list
> Freedreno@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/freedreno
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index c84f0a8b3f2c..5246b41798df 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -37,9 +37,10 @@ 
  * - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
  *           GEM object's debug name
  * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
+ * - 1.6.0 - Syncobj support
  */
 #define MSM_VERSION_MAJOR	1
-#define MSM_VERSION_MINOR	5
+#define MSM_VERSION_MINOR	6
 #define MSM_VERSION_PATCHLEVEL	0
 
 static const struct drm_mode_config_funcs mode_config_funcs = {
@@ -988,7 +989,8 @@  static struct drm_driver msm_driver = {
 	.driver_features    = DRIVER_GEM |
 				DRIVER_RENDER |
 				DRIVER_ATOMIC |
-				DRIVER_MODESET,
+				DRIVER_MODESET |
+				DRIVER_SYNCOBJ,
 	.open               = msm_open,
 	.postclose           = msm_postclose,
 	.lastclose          = drm_fb_helper_lastclose,
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index be5327af16fa..6c7f95fc5cfd 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -8,7 +8,9 @@ 
 #include <linux/sync_file.h>
 #include <linux/uaccess.h>
 
+#include <drm/drm_drv.h>
 #include <drm/drm_file.h>
+#include <drm/drm_syncobj.h>
 
 #include "msm_drv.h"
 #include "msm_gpu.h"
@@ -394,6 +396,191 @@  static void submit_cleanup(struct msm_gem_submit *submit)
 	ww_acquire_fini(&submit->ticket);
 }
 
+
+struct msm_submit_post_dep {
+	struct drm_syncobj *syncobj;
+	uint64_t point;
+	struct dma_fence_chain *chain;
+};
+
+static int msm_wait_deps(struct drm_device *dev,
+                         struct drm_file *file,
+                         uint64_t in_syncobjs_addr,
+                         uint32_t nr_in_syncobjs,
+                         uint32_t syncobj_stride,
+                         struct msm_ringbuffer *ring,
+                         struct drm_syncobj ***syncobjs)
+{
+	struct drm_msm_gem_submit_syncobj syncobj_desc = {0};
+	int ret = 0;
+	uint32_t i, j;
+
+	*syncobjs = kcalloc(nr_in_syncobjs, sizeof(**syncobjs),
+	                    GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+	if (!syncobjs) {
+		ret = -ENOMEM;
+		goto out_syncobjs;
+	}
+
+	for (i = 0; i < nr_in_syncobjs; ++i) {
+		uint64_t address = in_syncobjs_addr + i * syncobj_stride;
+		struct dma_fence *fence;
+
+		if (copy_from_user(&syncobj_desc,
+			           u64_to_user_ptr(address),
+			           min(syncobj_stride, sizeof(syncobj_desc)))) {
+			ret = -EFAULT;
+			goto out_syncobjs;
+		}
+
+		if (syncobj_desc.point &&
+		    !drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) {
+			ret = -EOPNOTSUPP;
+			break;
+		}
+
+		if (syncobj_desc.flags & ~MSM_SUBMIT_SYNCOBJ_FLAGS) {
+			ret = -EINVAL;
+			break;
+		}
+
+		ret = drm_syncobj_find_fence(file, syncobj_desc.handle,
+		                             syncobj_desc.point, 0, &fence);
+		if (ret)
+			break;
+
+		if (!dma_fence_match_context(fence, ring->fctx->context))
+			ret = dma_fence_wait(fence, true);
+
+		dma_fence_put(fence);
+		if (ret)
+			break;
+
+		if (syncobj_desc.flags & MSM_SUBMIT_SYNCOBJ_RESET) {
+			(*syncobjs)[i] =
+				drm_syncobj_find(file, syncobj_desc.handle);
+			if (!(*syncobjs)[i]) {
+				ret = -EINVAL;
+				break;
+			}
+		}
+	}
+
+out_syncobjs:
+	if (ret && *syncobjs) {
+		for (j = 0; j < i; ++j) {
+			if ((*syncobjs)[j])
+				drm_syncobj_put((*syncobjs)[j]);
+		}
+		kfree(*syncobjs);
+		*syncobjs = NULL;
+	}
+	return ret;
+}
+
+static void msm_reset_syncobjs(struct drm_syncobj **syncobjs,
+                               uint32_t nr_syncobjs)
+{
+	uint32_t i;
+
+	for (i = 0; syncobjs && i < nr_syncobjs; ++i) {
+		if (syncobjs[i])
+			drm_syncobj_replace_fence(syncobjs[i], NULL);
+	}
+}
+
+static int msm_parse_post_deps(struct drm_device *dev,
+                               struct drm_file *file,
+                               uint64_t out_syncobjs_addr,
+                               uint32_t nr_out_syncobjs,
+			       uint32_t syncobj_stride,
+                               struct msm_submit_post_dep **post_deps)
+{
+	int ret = 0;
+	uint32_t i, j;
+
+	*post_deps = kmalloc_array(nr_out_syncobjs, sizeof(**post_deps),
+	                           GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+	if (!*post_deps) {
+		ret = -ENOMEM;
+		goto out_syncobjs;
+	}
+
+	for (i = 0; i < nr_out_syncobjs; ++i) {
+		uint64_t address = out_syncobjs_addr + i * syncobj_stride;
+
+		if (copy_from_user(&syncobj_desc,
+			           u64_to_user_ptr(address),
+			           min(syncobj_stride, sizeof(syncobj_desc)))) {
+			ret = -EFAULT;
+			goto out_syncobjs;
+		}
+
+		(*post_deps)[i].point = syncobj_desc.point;
+		(*post_deps)[i].chain = NULL;
+
+		if (syncobj_desc.flags) {
+			ret = -EINVAL;
+			break;
+		}
+
+		if (syncobj_desc.point) {
+			if (!drm_core_check_feature(dev,
+			                            DRIVER_SYNCOBJ_TIMELINE)) {
+				ret = -EOPNOTSUPP;
+				break;
+			}
+
+			(*post_deps)[i].chain =
+				kmalloc(sizeof(*(*post_deps)[i].chain),
+				        GFP_KERNEL);
+			if (!(*post_deps)[i].chain) {
+				ret = -ENOMEM;
+				break;
+			}
+		}
+
+		(*post_deps)[i].syncobj =
+			drm_syncobj_find(file, syncobj_desc.handle);
+		if (!(*post_deps)[i].syncobj) {
+			ret = -EINVAL;
+			break;
+		}
+	}
+
+	if (ret) {
+		for (j = 0; j <= i; ++j) {
+			kfree((*post_deps)[j].chain);
+			if ((*post_deps)[j].syncobj)
+				drm_syncobj_put((*post_deps)[j].syncobj);
+		}
+
+		kfree(*post_deps);
+		*post_deps = NULL;
+	}
+
+out_syncobjs:
+	return ret;
+}
+
+static void msm_process_post_deps(struct msm_submit_post_dep *post_deps,
+                                  uint32_t count, struct dma_fence *fence)
+{
+	uint32_t i;
+
+	for (i = 0; post_deps && i < count; ++i) {
+		if (post_deps[i].chain) {
+			drm_syncobj_add_point(post_deps[i].syncobj,
+			                      post_deps[i].chain,
+			                      fence, post_deps[i].point);
+			post_deps[i].chain = NULL;
+		} else {
+			drm_syncobj_replace_fence(post_deps[i].syncobj,
+			                          fence);
+		}
+	}
+}
+
 int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		struct drm_file *file)
 {
@@ -406,6 +593,8 @@  int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	struct sync_file *sync_file = NULL;
 	struct msm_gpu_submitqueue *queue;
 	struct msm_ringbuffer *ring;
+	struct msm_submit_post_dep *post_deps = NULL;
+	struct drm_syncobj **syncobjs_to_reset = NULL;
 	int out_fence_fd = -1;
 	struct pid *pid = get_pid(task_pid(current));
 	unsigned i;
@@ -413,6 +602,9 @@  int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (!gpu)
 		return -ENXIO;
 
+	if (args->pad)
+		return -EINVAL;
+
 	/* for now, we just have 3d pipe.. eventually this would need to
 	 * be more clever to dispatch to appropriate gpu module:
 	 */
@@ -460,9 +652,28 @@  int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 			return ret;
 	}
 
+	if (args->flags & MSM_SUBMIT_SYNCOBJ_IN) {
+		ret = msm_wait_deps(dev, file,
+		                    args->in_syncobjs, args->nr_in_syncobjs,
+		                    args->syncobj_stride, ring,
+		                    &syncobjs_to_reset);
+		if (ret)
+			goto out_post_unlock;
+	}
+
+	if (args->flags & MSM_SUBMIT_SYNCOBJ_OUT) {
+		ret = msm_parse_post_deps(dev, file,
+		                          args->out_syncobjs,
+		                          args->nr_out_syncobjs,
+		                          args->syncobj_stride,
+		                          &post_deps);
+		if (ret)
+			goto out_post_unlock;
+	}
+
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
-		return ret;
+		goto out_post_unlock;
 
 	if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
 		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
@@ -586,6 +797,11 @@  int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		args->fence_fd = out_fence_fd;
 	}
 
+	msm_reset_syncobjs(syncobjs_to_reset, args->nr_in_syncobjs);
+	msm_process_post_deps(post_deps, args->nr_out_syncobjs,
+	                      submit->fence);
+
+
 out:
 	submit_cleanup(submit);
 	if (ret)
@@ -594,5 +810,23 @@  int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (ret && (out_fence_fd >= 0))
 		put_unused_fd(out_fence_fd);
 	mutex_unlock(&dev->struct_mutex);
+
+out_post_unlock:
+	if (post_deps) {
+		for (i = 0; i < args->nr_out_syncobjs; ++i) {
+			kfree(post_deps[i].chain);
+			drm_syncobj_put(post_deps[i].syncobj);
+		}
+		kfree(post_deps);
+	}
+
+	if (syncobjs_to_reset) {
+		for (i = 0; i < args->nr_in_syncobjs; ++i) {
+			if (syncobjs_to_reset[i])
+				drm_syncobj_put(syncobjs_to_reset[i]);
+		}
+		kfree(syncobjs_to_reset);
+	}
+
 	return ret;
 }
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 0b85ed6a3710..19806eb3a8e8 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -217,13 +217,28 @@  struct drm_msm_gem_submit_bo {
 #define MSM_SUBMIT_FENCE_FD_IN   0x40000000 /* enable input fence_fd */
 #define MSM_SUBMIT_FENCE_FD_OUT  0x20000000 /* enable output fence_fd */
 #define MSM_SUBMIT_SUDO          0x10000000 /* run submitted cmds from RB */
+#define MSM_SUBMIT_SYNCOBJ_IN    0x08000000 /* enable input syncobj */
+#define MSM_SUBMIT_SYNCOBJ_OUT   0x04000000 /* enable output syncobj */
 #define MSM_SUBMIT_FLAGS                ( \
 		MSM_SUBMIT_NO_IMPLICIT   | \
 		MSM_SUBMIT_FENCE_FD_IN   | \
 		MSM_SUBMIT_FENCE_FD_OUT  | \
 		MSM_SUBMIT_SUDO          | \
+		MSM_SUBMIT_SYNCOBJ_IN    | \
+		MSM_SUBMIT_SYNCOBJ_OUT   | \
 		0)
 
+#define MSM_SUBMIT_SYNCOBJ_RESET 0x00000001 /* Reset syncobj after wait. */
+#define MSM_SUBMIT_SYNCOBJ_FLAGS        ( \
+		MSM_SUBMIT_SYNCOBJ_RESET | \
+		0)
+
+struct drm_msm_gem_submit_syncobj {
+	__u32 handle;     /* in, syncobj handle. */
+	__u32 flags;      /* in, from MSM_SUBMIT_SYNCOBJ_FLAGS */
+	__u64 point;      /* in, timepoint for timeline syncobjs. */
+};
+
 /* Each cmdstream submit consists of a table of buffers involved, and
  * one or more cmdstream buffers.  This allows for conditional execution
  * (context-restore), and IB buffers needed for per tile/bin draw cmds.
@@ -236,7 +251,14 @@  struct drm_msm_gem_submit {
 	__u64 bos;            /* in, ptr to array of submit_bo's */
 	__u64 cmds;           /* in, ptr to array of submit_cmd's */
 	__s32 fence_fd;       /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */
-	__u32 queueid;         /* in, submitqueue id */
+	__u32 queueid;        /* in, submitqueue id */
+	__u64 in_syncobjs;    /* in, ptr to to array of drm_msm_gem_submit_syncobj */
+	__u64 out_syncobjs;   /* in, ptr to to array of drm_msm_gem_submit_syncobj */
+	__u32 nr_in_syncobjs; /* in, number of entries in in_syncobj */
+	__u32 nr_out_syncobjs; /* in, number of entries in out_syncobj. */
+	__u32 syncobj_stride; /* in, stride of syncobj arrays. */
+	__u32 pad;            /*in, reserved for future use, always 0. */
+
 };
 
 /* The normal way to synchronize with the GPU is just to CPU_PREP on