diff mbox

[RFC,libdrm,4/6] tegra: Add channel, job, pushbuf and fence APIs

Message ID 1392825893-7380-5-git-send-email-thierry.reding@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Thierry Reding Feb. 19, 2014, 4:04 p.m. UTC
From: Thierry Reding <treding@nvidia.com>

These functions can be used to open channels to engines, manage job
submissions, create push buffers to store command streams in and wait
until jobs have been completed.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 tegra/Makefile.am |   4 ++
 tegra/channel.c   | 127 +++++++++++++++++++++++++++++++++++++++++
 tegra/fence.c     |  72 +++++++++++++++++++++++
 tegra/job.c       | 167 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tegra/private.h   |  59 +++++++++++++++++++
 tegra/pushbuf.c   | 137 ++++++++++++++++++++++++++++++++++++++++++++
 tegra/tegra.h     |  52 +++++++++++++++++
 7 files changed, 618 insertions(+)
 create mode 100644 tegra/channel.c
 create mode 100644 tegra/fence.c
 create mode 100644 tegra/job.c
 create mode 100644 tegra/pushbuf.c

Comments

Erik Faye-Lund Feb. 19, 2014, 7:57 p.m. UTC | #1
On Wed, Feb 19, 2014 at 5:04 PM, Thierry Reding
<thierry.reding@gmail.com> wrote:
> From: Thierry Reding <treding@nvidia.com>
>
> These functions can be used to open channels to engines, manage job
> submissions, create push buffers to store command streams in and wait
> until jobs have been completed.
>
> Signed-off-by: Thierry Reding <treding@nvidia.com>

Thanks a lot for doing this! I'm going right for the juicy patch ;)

> +drm_public
> +int drm_tegra_fence_wait_timeout(struct drm_tegra_fence *fence,
> +                                unsigned long timeout)
> +{
> +       struct drm_tegra_syncpt_wait args;
> +       int err;
> +
> +       memset(&args, 0, sizeof(args));

Nit: how about

struct drm_tegra_syncpt_wait args = { 0 };

instead?

> +       args.id = fence->syncpt;
> +       args.thresh = fence->value;
> +       args.timeout = timeout;
> +
> +       while (true) {
> +               err = ioctl(fence->drm->fd, DRM_IOCTL_TEGRA_SYNCPT_WAIT, &args);
> +               if (err < 0) {
> +                       if (errno == EINTR)
> +                               continue;
> +
> +                       drmMsg("DRM_IOCTL_TEGRA_SYNCPT_WAIT: %d\n", -errno);

What's the reason for printing the errno negated? And could we do
'...%s\n" strerror(errno));' instead?

> +int drm_tegra_job_add_reloc(struct drm_tegra_job *job,
> +                           const struct drm_tegra_reloc *reloc)
> +{
> +       struct drm_tegra_reloc *relocs;
> +       size_t size;
> +
> +       size = (job->num_relocs + 1) * sizeof(*reloc);
> +
> +       relocs = realloc(job->relocs, size);

Nit: there's no point in not assigning those while declaring them, no?

size_t size = (job->num_relocs + 1) * sizeof(*reloc);
struct drm_tegra_reloc *relocs; = realloc(job->relocs, size);

> +drm_public
> +int drm_tegra_pushbuf_new(struct drm_tegra_pushbuf **pushbufp,
> +                         struct drm_tegra_job *job,
> +                         struct drm_tegra_bo *bo,
> +                         unsigned long offset)
> +{
> +       struct drm_tegra_pushbuf_private *pushbuf;
> +       void *ptr;
> +       int err;
> +
> +       pushbuf = calloc(1, sizeof(*pushbuf));
> +       if (!pushbuf)
> +               return -ENOMEM;
> +
> +       pushbuf->bo = drm_tegra_bo_get(bo);
> +       DRMINITLISTHEAD(&pushbuf->list);
> +       pushbuf->job = job;
> +
> +       err = drm_tegra_bo_map(bo, &ptr);
> +       if (err < 0) {
> +               drm_tegra_bo_put(bo);
> +               free(pushbuf);
> +               return err;
> +       }
> +
> +       pushbuf->start = pushbuf->base.ptr = ptr + offset;
> +       pushbuf->offset = offset;
> +
> +       DRMLISTADD(&pushbuf->list, &job->pushbufs);
> +       job->num_pushbufs++;
> +
> +       *pushbufp = &pushbuf->base;
> +
> +       return 0;
> +}

It feels quite wasteful to me to have to allocate a new pushbuf in
order to be able to use a new BO. I'd much rather see the pushbuf
being a persisting object that's the interface to the command-stream
(that produces jobs).

I was thinking something like:

int drm_tegra_pushbuf_new(struct drm_tegra_pushbuf **pushbufp, struct
drm_tegra_job *job)
int drm_tegra_pushbuf_room(struct drm_tegra_pushbuf *pushbuf, int num_words);

Where room guarantees that there's space for those words in the
pushbuf. A simple implementation could just allocate a bo of that
size, but a slightly more sophisticated one can allocate larger ones
and reuse them. Even more sophisticated ones could keep old cmdbufs
around and reuse them once the hardware is done reading them, do
exponential grow-factors etc.

I've implemented the "slightly more sophisticated" approach here:

https://github.com/grate-driver/libdrm/commit/f90ea2f57ca4d8c81768402900c663ce526bac11

In my implementation, I've changed the job-structure to build the list
of cmdbufs directly rather than keeping a list of the pushbufs. Sure,
that means another allocation every time we need a new cmdbuf, but
hopefully we should be able to produce much less of them this way.

> +int drm_tegra_pushbuf_relocate(struct drm_tegra_pushbuf *pushbuf,
> +                              struct drm_tegra_bo *target,
> +                              unsigned long offset,
> +                              unsigned long shift)
> +{
> +       struct drm_tegra_pushbuf_private *priv = pushbuf_priv(pushbuf);
> +       struct drm_tegra_reloc reloc;
> +       int err;
> +
> +       memset(&reloc, 0, sizeof(reloc));
> +       reloc.cmdbuf.handle = priv->bo->handle;
> +       reloc.cmdbuf.offset = drm_tegra_pushbuf_get_offset(pushbuf);
> +       reloc.target.handle = target->handle;
> +       reloc.target.offset = offset;
> +       reloc.shift = shift;
> +
> +       err = drm_tegra_job_add_reloc(priv->job, &reloc);
> +       if (err < 0)
> +               return err;
> +
> +       return 0;
> +}

Whenever we insert a reloc, we also insert a DEADBEEF in the command
stream. Why not formalize this into this function?
Thierry Reding Feb. 19, 2014, 9:11 p.m. UTC | #2
On Wed, Feb 19, 2014 at 08:57:29PM +0100, Erik Faye-Lund wrote:
> On Wed, Feb 19, 2014 at 5:04 PM, Thierry Reding <thierry.reding@gmail.com> wrote:
[...]
> > +drm_public
> > +int drm_tegra_fence_wait_timeout(struct drm_tegra_fence *fence,
> > +                                unsigned long timeout)
> > +{
> > +       struct drm_tegra_syncpt_wait args;
> > +       int err;
> > +
> > +       memset(&args, 0, sizeof(args));
> 
> Nit: how about
> 
> struct drm_tegra_syncpt_wait args = { 0 };
> 
> instead?

I've compiled both variants and they seem to be generating exactly the
same code. Oddly enough, neither of them seems to be explicitly clearing
any part of the stack and indeed leaving out memset() and = { 0, } does
generate the same code again. It looks like the compiler is being pretty
clever about optimizing this part.

The reason I prefer memset() is that it's somewhat more explicit. So if
you don't have any strong objections I'd like to stick with it.

> > +       args.id = fence->syncpt;
> > +       args.thresh = fence->value;
> > +       args.timeout = timeout;
> > +
> > +       while (true) {
> > +               err = ioctl(fence->drm->fd, DRM_IOCTL_TEGRA_SYNCPT_WAIT, &args);
> > +               if (err < 0) {
> > +                       if (errno == EINTR)
> > +                               continue;
> > +
> > +                       drmMsg("DRM_IOCTL_TEGRA_SYNCPT_WAIT: %d\n", -errno);
> 
> What's the reason for printing the errno negated? And could we do
> '...%s\n" strerror(errno));' instead?

Yeah, strerror(errno) would be preferable. On second thought maybe I
should drop that message altogether since we return -errno anyway and
therefore have access to it in the caller who can then decide to print a
message or not.

> > +int drm_tegra_job_add_reloc(struct drm_tegra_job *job,
> > +                           const struct drm_tegra_reloc *reloc)
> > +{
> > +       struct drm_tegra_reloc *relocs;
> > +       size_t size;
> > +
> > +       size = (job->num_relocs + 1) * sizeof(*reloc);
> > +
> > +       relocs = realloc(job->relocs, size);
> 
> Nit: there's no point in not assigning those while declaring them, no?
> 
> size_t size = (job->num_relocs + 1) * sizeof(*reloc);
> struct drm_tegra_reloc *relocs; = realloc(job->relocs, size);

In my opinion that's a lot of clutter and very hard to read. So it's
really just a matter of preferred coding style.

> > +drm_public
> > +int drm_tegra_pushbuf_new(struct drm_tegra_pushbuf **pushbufp,
> > +                         struct drm_tegra_job *job,
> > +                         struct drm_tegra_bo *bo,
> > +                         unsigned long offset)
> > +{
> > +       struct drm_tegra_pushbuf_private *pushbuf;
> > +       void *ptr;
> > +       int err;
> > +
> > +       pushbuf = calloc(1, sizeof(*pushbuf));
> > +       if (!pushbuf)
> > +               return -ENOMEM;
> > +
> > +       pushbuf->bo = drm_tegra_bo_get(bo);
> > +       DRMINITLISTHEAD(&pushbuf->list);
> > +       pushbuf->job = job;
> > +
> > +       err = drm_tegra_bo_map(bo, &ptr);
> > +       if (err < 0) {
> > +               drm_tegra_bo_put(bo);
> > +               free(pushbuf);
> > +               return err;
> > +       }
> > +
> > +       pushbuf->start = pushbuf->base.ptr = ptr + offset;
> > +       pushbuf->offset = offset;
> > +
> > +       DRMLISTADD(&pushbuf->list, &job->pushbufs);
> > +       job->num_pushbufs++;
> > +
> > +       *pushbufp = &pushbuf->base;
> > +
> > +       return 0;
> > +}
> 
> It feels quite wasteful to me to have to allocate a new pushbuf in
> order to be able to use a new BO. I'd much rather see the pushbuf
> being a persisting object that's the interface to the command-stream
> (that produces jobs).
> 
> I was thinking something like:
> 
> int drm_tegra_pushbuf_new(struct drm_tegra_pushbuf **pushbufp, struct
> drm_tegra_job *job)
> int drm_tegra_pushbuf_room(struct drm_tegra_pushbuf *pushbuf, int num_words);
> 
> Where room guarantees that there's space for those words in the
> pushbuf. A simple implementation could just allocate a bo of that
> size, but a slightly more sophisticated one can allocate larger ones
> and reuse them. Even more sophisticated ones could keep old cmdbufs
> around and reuse them once the hardware is done reading them, do
> exponential grow-factors etc.

Okay, so you suggest that the backing buffer objects are handled
entirely by the push buffer implementation? Yeah, I think that makes a
lot of sense actually.

> I've implemented the "slightly more sophisticated" approach here:
> 
> https://github.com/grate-driver/libdrm/commit/f90ea2f57ca4d8c81768402900c663ce526bac11
> 
> In my implementation, I've changed the job-structure to build the list
> of cmdbufs directly rather than keeping a list of the pushbufs. Sure,
> that means another allocation every time we need a new cmdbuf, but
> hopefully we should be able to produce much less of them this way.

Okay, I'll try to integrate your implementation. It looks somewhat
complex but still manageable. The important part at this point is to get
the API right. That way we can still implement whatever complex scheme
we want underneath.

> > +int drm_tegra_pushbuf_relocate(struct drm_tegra_pushbuf *pushbuf,
> > +                              struct drm_tegra_bo *target,
> > +                              unsigned long offset,
> > +                              unsigned long shift)
> > +{
> > +       struct drm_tegra_pushbuf_private *priv = pushbuf_priv(pushbuf);
> > +       struct drm_tegra_reloc reloc;
> > +       int err;
> > +
> > +       memset(&reloc, 0, sizeof(reloc));
> > +       reloc.cmdbuf.handle = priv->bo->handle;
> > +       reloc.cmdbuf.offset = drm_tegra_pushbuf_get_offset(pushbuf);
> > +       reloc.target.handle = target->handle;
> > +       reloc.target.offset = offset;
> > +       reloc.shift = shift;
> > +
> > +       err = drm_tegra_job_add_reloc(priv->job, &reloc);
> > +       if (err < 0)
> > +               return err;
> > +
> > +       return 0;
> > +}
> 
> Whenever we insert a reloc, we also insert a DEADBEEF in the command
> stream. Why not formalize this into this function?

That's a good idea.

Thierry
diff mbox

Patch

diff --git a/tegra/Makefile.am b/tegra/Makefile.am
index 1b83145b120d..c73587e8661e 100644
--- a/tegra/Makefile.am
+++ b/tegra/Makefile.am
@@ -11,6 +11,10 @@  libdrm_tegra_la_LDFLAGS = -version-number 0:0:0 -no-undefined
 libdrm_tegra_la_LIBADD = ../libdrm.la @PTHREADSTUBS_LIBS@
 
 libdrm_tegra_la_SOURCES = \
+	channel.c \
+	fence.c \
+	job.c \
+	pushbuf.c \
 	tegra.c
 
 libdrm_tegraincludedir = ${includedir}/libdrm
diff --git a/tegra/channel.c b/tegra/channel.c
new file mode 100644
index 000000000000..03cce30e98b9
--- /dev/null
+++ b/tegra/channel.c
@@ -0,0 +1,127 @@ 
+/*
+ * Copyright © 2012, 2013 Thierry Reding
+ * Copyright © 2013 Erik Faye-Lund
+ * Copyright © 2014 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <errno.h>
+#include <string.h>
+
+#include "private.h"
+
+static int drm_tegra_channel_setup(struct drm_tegra_channel *channel)
+{
+	struct drm_tegra *drm = channel->drm;
+	struct drm_tegra_get_syncpt args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.context = channel->context;
+	args.index = 0;
+
+	err = ioctl(drm->fd, DRM_IOCTL_TEGRA_GET_SYNCPT, &args);
+	if (err < 0)
+		return -errno;
+
+	channel->syncpt = args.id;
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_channel_open(struct drm_tegra_channel **channelp,
+			   struct drm_tegra *drm,
+			   enum drm_tegra_class client)
+{
+	struct drm_tegra_open_channel args;
+	struct drm_tegra_channel *channel;
+	enum host1x_class class;
+	int err;
+
+	switch (client) {
+	case DRM_TEGRA_GR2D:
+		class = HOST1X_CLASS_GR2D;
+		break;
+
+	case DRM_TEGRA_GR3D:
+		class = HOST1X_CLASS_GR3D;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	channel = calloc(1, sizeof(*channel));
+	if (!channel)
+		return -ENOMEM;
+
+	channel->drm = drm;
+
+	memset(&args, 0, sizeof(args));
+	args.client = class;
+
+	err = ioctl(drm->fd, DRM_IOCTL_TEGRA_OPEN_CHANNEL, &args);
+	if (err < 0) {
+		free(channel);
+		return -errno;
+	}
+
+	channel->context = args.context;
+	channel->class = class;
+
+	err = drm_tegra_channel_setup(channel);
+	if (err < 0) {
+		free(channel);
+		return err;
+	}
+
+	*channelp = channel;
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_channel_close(struct drm_tegra_channel *channel)
+{
+	struct drm_tegra_open_channel args;
+	struct drm_tegra *drm;
+	int err;
+
+	if (!channel)
+		return -EINVAL;
+
+	drm = channel->drm;
+
+	memset(&args, 0, sizeof(args));
+	args.context = channel->context;
+
+	err = ioctl(drm->fd, DRM_IOCTL_TEGRA_CLOSE_CHANNEL, &args);
+	if (err < 0)
+		return -errno;
+
+	free(channel);
+
+	return 0;
+}
diff --git a/tegra/fence.c b/tegra/fence.c
new file mode 100644
index 000000000000..6af60500c5f1
--- /dev/null
+++ b/tegra/fence.c
@@ -0,0 +1,72 @@ 
+/*
+ * Copyright © 2012, 2013 Thierry Reding
+ * Copyright © 2013 Erik Faye-Lund
+ * Copyright © 2014 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <errno.h>
+#include <string.h>
+
+#include "private.h"
+
+drm_public
+int drm_tegra_fence_wait_timeout(struct drm_tegra_fence *fence,
+				 unsigned long timeout)
+{
+	struct drm_tegra_syncpt_wait args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.id = fence->syncpt;
+	args.thresh = fence->value;
+	args.timeout = timeout;
+
+	while (true) {
+		err = ioctl(fence->drm->fd, DRM_IOCTL_TEGRA_SYNCPT_WAIT, &args);
+		if (err < 0) {
+			if (errno == EINTR)
+				continue;
+
+			drmMsg("DRM_IOCTL_TEGRA_SYNCPT_WAIT: %d\n", -errno);
+			return -errno;
+		}
+
+		break;
+	}
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_fence_wait(struct drm_tegra_fence *fence)
+{
+	return drm_tegra_fence_wait_timeout(fence, -1);
+}
+
+drm_public
+void drm_tegra_fence_free(struct drm_tegra_fence *fence)
+{
+	free(fence);
+}
diff --git a/tegra/job.c b/tegra/job.c
new file mode 100644
index 000000000000..506164cec95e
--- /dev/null
+++ b/tegra/job.c
@@ -0,0 +1,167 @@ 
+/*
+ * Copyright © 2012, 2013 Thierry Reding
+ * Copyright © 2013 Erik Faye-Lund
+ * Copyright © 2014 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "private.h"
+
+int drm_tegra_job_add_reloc(struct drm_tegra_job *job,
+			    const struct drm_tegra_reloc *reloc)
+{
+	struct drm_tegra_reloc *relocs;
+	size_t size;
+
+	size = (job->num_relocs + 1) * sizeof(*reloc);
+
+	relocs = realloc(job->relocs, size);
+	if (!reloc)
+		return -ENOMEM;
+
+	job->relocs = relocs;
+
+	job->relocs[job->num_relocs++] = *reloc;
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_job_new(struct drm_tegra_job **jobp,
+		      struct drm_tegra_channel *channel)
+{
+	struct drm_tegra_job *job;
+
+	job = calloc(1, sizeof(*job));
+	if (!job)
+		return -ENOMEM;
+
+	DRMINITLISTHEAD(&job->pushbufs);
+	job->channel = channel;
+
+	*jobp = job;
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_job_free(struct drm_tegra_job *job)
+{
+	struct drm_tegra_pushbuf_private *pushbuf;
+
+	if (!job)
+		return -EINVAL;
+
+	DRMLISTFOREACHENTRY(pushbuf, &job->pushbufs, list)
+		drm_tegra_pushbuf_free(&pushbuf->base);
+
+	free(job->relocs);
+	free(job);
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_job_submit(struct drm_tegra_job *job,
+			 struct drm_tegra_fence **fencep)
+{
+	struct drm_tegra *drm = job->channel->drm;
+	struct drm_tegra_pushbuf_private *pushbuf;
+	struct drm_tegra_fence *fence = NULL;
+	struct drm_tegra_cmdbuf *cmdbufs;
+	struct drm_tegra_syncpt *syncpts;
+	struct drm_tegra_submit args;
+	unsigned int i;
+	int err;
+
+	if (fencep) {
+		fence = calloc(1, sizeof(*fence));
+		if (!fence)
+			return -ENOMEM;
+	}
+
+	cmdbufs = calloc(job->num_pushbufs, sizeof(*cmdbufs));
+	if (!cmdbufs) {
+		free(fence);
+		return -ENOMEM;
+	}
+
+	DRMLISTFOREACHENTRY(pushbuf, &job->pushbufs, list) {
+		struct drm_tegra_cmdbuf *cmdbuf = &cmdbufs[i];
+
+		cmdbuf->handle = pushbuf->bo->handle;
+		cmdbuf->offset = pushbuf->offset;
+		cmdbuf->words = pushbuf->base.ptr - pushbuf->start;
+	}
+
+	syncpts = calloc(1, sizeof(*syncpts));
+	if (!syncpts) {
+		free(cmdbufs);
+		free(fence);
+		return -ENOMEM;
+	}
+
+	syncpts[0].id = job->syncpt;
+	syncpts[0].incrs = job->increments;
+
+	memset(&args, 0, sizeof(args));
+	args.context = job->channel->context;
+	args.num_syncpts = 1;
+	args.num_cmdbufs = job->num_pushbufs;
+	args.num_relocs = job->num_relocs;
+	args.num_waitchks = 0;
+	args.waitchk_mask = 0;
+	args.timeout = 1000;
+
+	args.syncpts = (uintptr_t)syncpts;
+	args.cmdbufs = (uintptr_t)cmdbufs;
+	args.relocs = (uintptr_t)job->relocs;
+	args.waitchks = 0;
+
+	err = ioctl(drm->fd, DRM_IOCTL_TEGRA_SUBMIT, &args);
+	if (err < 0) {
+		free(syncpts);
+		free(cmdbufs);
+		free(fence);
+		return -errno;
+	}
+
+	if (fence) {
+		fence->syncpt = job->syncpt;
+		fence->value = args.fence;
+		fence->drm = drm;
+	}
+
+	if (fencep)
+		*fencep = fence;
+
+	free(syncpts);
+	free(cmdbufs);
+
+	return 0;
+}
diff --git a/tegra/private.h b/tegra/private.h
index ec69295c2cf8..3a72e9962f53 100644
--- a/tegra/private.h
+++ b/tegra/private.h
@@ -26,10 +26,13 @@ 
 #define __DRM_TEGRA_PRIVATE_H__ 1
 
 #include <stdbool.h>
+#include <stddef.h>
 #include <stdint.h>
 
+#include <libdrm_lists.h>
 #include <xf86atomic.h>
 
+#include "tegra_drm.h"
 #include "tegra.h"
 
 #if defined(HAVE_VISIBILITY)
@@ -40,6 +43,18 @@ 
 #  define drm_public
 #endif
 
+#define container_of(ptr, type, member) ({				\
+		const typeof(((type *)0)->member) *__mptr = (ptr);	\
+		(type *)((char *)__mptr - offsetof(type, member));	\
+	})
+
+enum host1x_class {
+	HOST1X_CLASS_HOST1X = 0x01,
+	HOST1X_CLASS_GR2D = 0x51,
+	HOST1X_CLASS_GR2D_SB = 0x52,
+	HOST1X_CLASS_GR3D = 0x60,
+};
+
 struct drm_tegra {
 	bool close;
 	int fd;
@@ -55,4 +70,48 @@  struct drm_tegra_bo {
 	void *map;
 };
 
+struct drm_tegra_channel {
+	struct drm_tegra *drm;
+	enum host1x_class class;
+	uint64_t context;
+	uint32_t syncpt;
+};
+
+struct drm_tegra_fence {
+	struct drm_tegra *drm;
+	uint32_t syncpt;
+	uint32_t value;
+};
+
+struct drm_tegra_pushbuf_private {
+	struct drm_tegra_pushbuf base;
+	struct drm_tegra_job *job;
+	struct drm_tegra_bo *bo;
+	unsigned long offset;
+	drmMMListHead list;
+	uint32_t *start;
+};
+
+static inline struct drm_tegra_pushbuf_private *
+pushbuf_priv(struct drm_tegra_pushbuf *pb)
+{
+	return container_of(pb, struct drm_tegra_pushbuf_private, base);
+}
+
+struct drm_tegra_job {
+	struct drm_tegra_channel *channel;
+
+	unsigned int increments;
+	uint32_t syncpt;
+
+	struct drm_tegra_reloc *relocs;
+	unsigned int num_relocs;
+
+	unsigned int num_pushbufs;
+	drmMMListHead pushbufs;
+};
+
+int drm_tegra_job_add_reloc(struct drm_tegra_job *job,
+			    const struct drm_tegra_reloc *reloc);
+
 #endif /* __DRM_TEGRA_PRIVATE_H__ */
diff --git a/tegra/pushbuf.c b/tegra/pushbuf.c
new file mode 100644
index 000000000000..93f72fd40650
--- /dev/null
+++ b/tegra/pushbuf.c
@@ -0,0 +1,137 @@ 
+/*
+ * Copyright © 2012, 2013 Thierry Reding
+ * Copyright © 2013 Erik Faye-Lund
+ * Copyright © 2014 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "private.h"
+
+#define HOST1X_OPCODE_NONINCR(offset, count) \
+    ((0x2 << 28) | (((offset) & 0xfff) << 16) | ((count) & 0xffff))
+
+static inline unsigned long
+drm_tegra_pushbuf_get_offset(struct drm_tegra_pushbuf *pushbuf)
+{
+	struct drm_tegra_pushbuf_private *priv = pushbuf_priv(pushbuf);
+	struct drm_tegra_bo *bo = priv->bo;
+
+	return (unsigned long)pushbuf->ptr - (unsigned long)bo->map;
+}
+
+drm_public
+int drm_tegra_pushbuf_new(struct drm_tegra_pushbuf **pushbufp,
+			  struct drm_tegra_job *job,
+			  struct drm_tegra_bo *bo,
+			  unsigned long offset)
+{
+	struct drm_tegra_pushbuf_private *pushbuf;
+	void *ptr;
+	int err;
+
+	pushbuf = calloc(1, sizeof(*pushbuf));
+	if (!pushbuf)
+		return -ENOMEM;
+
+	pushbuf->bo = drm_tegra_bo_get(bo);
+	DRMINITLISTHEAD(&pushbuf->list);
+	pushbuf->job = job;
+
+	err = drm_tegra_bo_map(bo, &ptr);
+	if (err < 0) {
+		drm_tegra_bo_put(bo);
+		free(pushbuf);
+		return err;
+	}
+
+	pushbuf->start = pushbuf->base.ptr = ptr + offset;
+	pushbuf->offset = offset;
+
+	DRMLISTADD(&pushbuf->list, &job->pushbufs);
+	job->num_pushbufs++;
+
+	*pushbufp = &pushbuf->base;
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_pushbuf_free(struct drm_tegra_pushbuf *pushbuf)
+{
+	struct drm_tegra_pushbuf_private *priv = pushbuf_priv(pushbuf);
+
+	if (!pushbuf)
+		return -EINVAL;
+
+	drm_tegra_bo_unmap(priv->bo);
+	drm_tegra_bo_put(priv->bo);
+	DRMLISTDEL(&priv->list);
+	free(priv);
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_pushbuf_relocate(struct drm_tegra_pushbuf *pushbuf,
+			       struct drm_tegra_bo *target,
+			       unsigned long offset,
+			       unsigned long shift)
+{
+	struct drm_tegra_pushbuf_private *priv = pushbuf_priv(pushbuf);
+	struct drm_tegra_reloc reloc;
+	int err;
+
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.cmdbuf.handle = priv->bo->handle;
+	reloc.cmdbuf.offset = drm_tegra_pushbuf_get_offset(pushbuf);
+	reloc.target.handle = target->handle;
+	reloc.target.offset = offset;
+	reloc.shift = shift;
+
+	err = drm_tegra_job_add_reloc(priv->job, &reloc);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_pushbuf_sync(struct drm_tegra_pushbuf *pushbuf,
+			   enum drm_tegra_syncpt_cond cond)
+{
+	struct drm_tegra_pushbuf_private *priv = pushbuf_priv(pushbuf);
+
+	if (cond >= DRM_TEGRA_SYNCPT_COND_MAX)
+		return -EINVAL;
+
+	*pushbuf->ptr++ = HOST1X_OPCODE_NONINCR(0x0, 0x1);
+	*pushbuf->ptr++ = cond << 8 | priv->job->syncpt;
+	priv->job->increments++;
+
+	return 0;
+}
diff --git a/tegra/tegra.h b/tegra/tegra.h
index 0731cb3bd4dc..ca0ade1bddad 100644
--- a/tegra/tegra.h
+++ b/tegra/tegra.h
@@ -28,6 +28,13 @@ 
 #include <stdint.h>
 #include <stdlib.h>
 
+#include <tegra_drm.h>
+
+enum drm_tegra_class {
+	DRM_TEGRA_GR2D,
+	DRM_TEGRA_GR3D,
+};
+
 struct drm_tegra_bo;
 struct drm_tegra;
 
@@ -44,4 +51,49 @@  int drm_tegra_bo_get_handle(struct drm_tegra_bo *bo, uint32_t *handle);
 int drm_tegra_bo_map(struct drm_tegra_bo *bo, void **ptr);
 int drm_tegra_bo_unmap(struct drm_tegra_bo *bo);
 
+struct drm_tegra_channel;
+struct drm_tegra_job;
+
+struct drm_tegra_pushbuf {
+	uint32_t *ptr;
+};
+
+struct drm_tegra_fence;
+
+enum drm_tegra_syncpt_cond {
+	DRM_TEGRA_SYNCPT_COND_IMMEDIATE,
+	DRM_TEGRA_SYNCPT_COND_OP_DONE,
+	DRM_TEGRA_SYNCPT_COND_RD_DONE,
+	DRM_TEGRA_SYNCPT_COND_WR_SAFE,
+	DRM_TEGRA_SYNCPT_COND_MAX,
+};
+
+int drm_tegra_channel_open(struct drm_tegra_channel **channelp,
+			   struct drm_tegra *drm,
+			   enum drm_tegra_class client);
+int drm_tegra_channel_close(struct drm_tegra_channel *channel);
+
+int drm_tegra_job_new(struct drm_tegra_job **jobp,
+		      struct drm_tegra_channel *channel);
+int drm_tegra_job_free(struct drm_tegra_job *job);
+int drm_tegra_job_submit(struct drm_tegra_job *job,
+			 struct drm_tegra_fence **fencep);
+
+int drm_tegra_pushbuf_new(struct drm_tegra_pushbuf **pushbufp,
+			  struct drm_tegra_job *job,
+			  struct drm_tegra_bo *bo,
+			  unsigned long offset);
+int drm_tegra_pushbuf_free(struct drm_tegra_pushbuf *pushbuf);
+int drm_tegra_pushbuf_relocate(struct drm_tegra_pushbuf *pushbuf,
+			       struct drm_tegra_bo *target,
+			       unsigned long offset,
+			       unsigned long shift);
+int drm_tegra_pushbuf_sync(struct drm_tegra_pushbuf *pushbuf,
+			   enum drm_tegra_syncpt_cond cond);
+
+int drm_tegra_fence_wait_timeout(struct drm_tegra_fence *fence,
+				 unsigned long timeout);
+int drm_tegra_fence_wait(struct drm_tegra_fence *fence);
+void drm_tegra_fence_free(struct drm_tegra_fence *fence);
+
 #endif /* __DRM_TEGRA_H__ */