diff mbox

[PATCHv5,RESEND,2/8] gpu: host1x: Add syncpoint wait and interrupts

Message ID 1358250244-9678-3-git-send-email-tbergstrom@nvidia.com (mailing list archive)
State New, archived
Headers show

Commit Message

Terje Bergstrom Jan. 15, 2013, 11:43 a.m. UTC
Add support for sync point interrupts, and sync point wait. Sync
point wait used interrupts for unblocking wait.

Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/host1x/Makefile              |    1 +
 drivers/gpu/host1x/dev.c                 |   21 +-
 drivers/gpu/host1x/dev.h                 |   17 +-
 drivers/gpu/host1x/hw/host1x01.c         |    2 +
 drivers/gpu/host1x/hw/hw_host1x01_sync.h |   42 ++++
 drivers/gpu/host1x/hw/intr_hw.c          |  178 +++++++++++++++
 drivers/gpu/host1x/intr.c                |  356 ++++++++++++++++++++++++++++++
 drivers/gpu/host1x/intr.h                |  103 +++++++++
 drivers/gpu/host1x/syncpt.c              |  163 ++++++++++++++
 drivers/gpu/host1x/syncpt.h              |    5 +
 10 files changed, 883 insertions(+), 5 deletions(-)
 create mode 100644 drivers/gpu/host1x/hw/intr_hw.c
 create mode 100644 drivers/gpu/host1x/intr.c
 create mode 100644 drivers/gpu/host1x/intr.h

Comments

Thierry Reding Feb. 4, 2013, 10:30 a.m. UTC | #1
On Tue, Jan 15, 2013 at 01:43:58PM +0200, Terje Bergstrom wrote:
[...]
> diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
[...]
> @@ -95,7 +96,6 @@ static int host1x_probe(struct platform_device *dev)
>  
>  	/* set common host1x device data */
>  	platform_set_drvdata(dev, host);
> -
>  	host->regs = devm_request_and_ioremap(&dev->dev, regs);
>  	if (!host->regs) {
>  		dev_err(&dev->dev, "failed to remap host registers\n");

This seems an unrelated (and actually undesirable) change.

> @@ -109,28 +109,40 @@ static int host1x_probe(struct platform_device *dev)
>  	}
>  
>  	err = host1x_syncpt_init(host);
> -	if (err)
> +	if (err) {
> +		dev_err(&dev->dev, "failed to init sync points");
>  		return err;
> +	}

This error message should probably have gone in the previous patch as
well.

> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index d8f5979..8376092 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -17,11 +17,12 @@
>  #ifndef HOST1X_DEV_H
>  #define HOST1X_DEV_H
>  
> +#include <linux/platform_device.h>
>  #include "syncpt.h"
> +#include "intr.h"
>  
>  struct host1x;
>  struct host1x_syncpt;
> -struct platform_device;

Why include platform_device.h here?

> @@ -34,6 +35,18 @@ struct host1x_syncpt_ops {
>  	const char * (*name)(struct host1x_syncpt *);
>  };
>  
> +struct host1x_intr_ops {
> +	void (*init_host_sync)(struct host1x_intr *);
> +	void (*set_host_clocks_per_usec)(
> +		struct host1x_intr *, u32 clocks);

Could the above two not be combined? The only reason to keep them
separate would be if the host1x clock was dynamically changed, but I
don't think we support that, right?

> +	void (*set_syncpt_threshold)(
> +		struct host1x_intr *, u32 id, u32 thresh);
> +	void (*enable_syncpt_intr)(struct host1x_intr *, u32 id);
> +	void (*disable_syncpt_intr)(struct host1x_intr *, u32 id);
> +	void (*disable_all_syncpt_intrs)(struct host1x_intr *);

Can disable_all_syncpt_intrs() not be implemented generically using the
number of syncpoints as exposed by host1x_device_info and the
.disable_syncpt_intr() function?

> @@ -46,11 +59,13 @@ struct host1x_device_info {
>  struct host1x {
>  	void __iomem *regs;
>  	struct host1x_syncpt *syncpt;
> +	struct host1x_intr intr;
>  	struct platform_device *dev;
>  	struct host1x_device_info info;
>  	struct clk *clk;
>  
>  	struct host1x_syncpt_ops syncpt_op;
> +	struct host1x_intr_ops intr_op;

I think carrying a const pointer to the interrupt operations structure
is a better option here.

> diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
[...]
> +static void host1x_intr_syncpt_thresh_isr(struct host1x_intr_syncpt *syncpt);

Can we avoid this forward declaration?

> +static void syncpt_thresh_cascade_fn(struct work_struct *work)

syncpt_thresh_work()?

> +{
> +	struct host1x_intr_syncpt *sp =
> +		container_of(work, struct host1x_intr_syncpt, work);
> +	host1x_syncpt_thresh_fn(sp);

Couldn't we inline the host1x_syncpt_thresh_fn() implementation here?
Why do we need to go through an external function declaration?

> +static irqreturn_t syncpt_thresh_cascade_isr(int irq, void *dev_id)
> +{
> +	struct host1x *host1x = dev_id;
> +	struct host1x_intr *intr = &host1x->intr;
> +	unsigned long reg;
> +	int i, id;
> +
> +	for (i = 0; i < host1x->info.nb_pts / BITS_PER_LONG; i++) {
> +		reg = host1x_sync_readl(host1x,
> +				HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS +
> +				i * REGISTER_STRIDE);
> +		for_each_set_bit(id, &reg, BITS_PER_LONG) {
> +			struct host1x_intr_syncpt *sp =
> +				intr->syncpt + (i * BITS_PER_LONG + id);
> +			host1x_intr_syncpt_thresh_isr(sp);

Have you considered mimicking the IRQ API and name this something like
host1x_intr_syncpt_thresh_handle() and name the actual ISR just
syncpt_thresh_isr()? Not so important but it makes things a bit clearer
in my opinion.

> +			queue_work(intr->wq, &sp->work);

Should the call to queue_work() perhaps be moved into
host1x_intr_syncpt_thresh_isr().

> +static void host1x_intr_init_host_sync(struct host1x_intr *intr)
> +{
> +	struct host1x *host1x = intr_to_host1x(intr);
> +	int i, err;
> +
> +	host1x_sync_writel(host1x, 0xffffffffUL,
> +		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE);
> +	host1x_sync_writel(host1x, 0xffffffffUL,
> +		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS);
> +
> +	for (i = 0; i < host1x->info.nb_pts; i++)
> +		INIT_WORK(&intr->syncpt[i].work, syncpt_thresh_cascade_fn);
> +
> +	err = devm_request_irq(&host1x->dev->dev, intr->syncpt_irq,
> +				syncpt_thresh_cascade_isr,
> +				IRQF_SHARED, "host1x_syncpt", host1x);
> +	WARN_ON(IS_ERR_VALUE(err));

Do we really want to continue in this case?

> +static void host1x_intr_set_syncpt_threshold(struct host1x_intr *intr,
> +	u32 id, u32 thresh)
> +{
> +	struct host1x *host1x = intr_to_host1x(intr);
> +	host1x_sync_writel(host1x, thresh,
> +		HOST1X_SYNC_SYNCPT_INT_THRESH_0 + id * REGISTER_STRIDE);
> +}

Again, maybe defining the register stride as part of the register
definition might be better. I think HOST1X_SYNC_SYNCPT_INT_THRESH(id) is
easier to read.

> +static void host1x_intr_enable_syncpt_intr(struct host1x_intr *intr, u32 id)
> +{
> +	struct host1x *host1x = intr_to_host1x(intr);
> +
> +	host1x_sync_writel(host1x, BIT_MASK(id),
> +			HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0 +
> +			BIT_WORD(id) * REGISTER_STRIDE);
> +}

Same here.

> +static void host1x_intr_disable_syncpt_intr(struct host1x_intr *intr, u32 id)
> +{
> +	struct host1x *host1x = intr_to_host1x(intr);
> +
> +	host1x_sync_writel(host1x, BIT_MASK(id),
> +			HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE +
> +			BIT_WORD(id) * REGISTER_STRIDE);
> +
> +	host1x_sync_writel(host1x, BIT_MASK(id),
> +		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS +
> +		BIT_WORD(id) * REGISTER_STRIDE);
> +}

And here.

> diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
[...]
> +#include "intr.h"
> +#include <linux/interrupt.h>
> +#include <linux/slab.h>
> +#include <linux/irq.h>
> +#include "dev.h"

More funky ordering of includes.

> +int host1x_intr_add_action(struct host1x_intr *intr, u32 id, u32 thresh,
> +			enum host1x_intr_action action, void *data,
> +			void *_waiter,
> +			void **ref)

Why do you pass in _waiter as void * and not struct host1x_waitlist *?

I think I've said this before. The interface doesn't seem optimal to me
here. Passing in an enumeration to choose which action to perform looks
difficult to work with (not to mention the symbols are rather long and
therefore result in ugly code).

Maybe doing this by passing around a pointer to a handler function would
be nicer. However since I haven't really used this yet, I can't really
tell. So maybe we should just merge the implementation as-is for now. We
can always clean it up later.

> +void *host1x_intr_alloc_waiter(void)
> +{
> +	return kzalloc(sizeof(struct host1x_waitlist), GFP_KERNEL);
> +}

I'm not sure why this is separate from host1x_syncpt_wait() since it is
only used inside that function and the waiter returned never leaves the
scope of that function, so it might be better to allocate it directly
in host1x_syncpt_wait() instead.

Actually, it looks like the waiter doesn't ever leave scope, so you may
even want to allocate it on the stack.

> +void host1x_intr_put_ref(struct host1x_intr *intr, u32 id, void *ref)

Here again, you pass in the waiter via a void *. Why's that?

> +int host1x_intr_init(struct host1x_intr *intr, u32 irq_sync)

Maybe you should keep the type of the irq_sync here so that it properly
propagates to the call to devm_request_irq().

> +{
> +	unsigned int id;
> +	struct host1x *host1x = intr_to_host1x(intr);
> +	u32 nb_pts = host1x_syncpt_nb_pts(host1x);
> +
> +	intr->syncpt = devm_kzalloc(&host1x->dev->dev,
> +			sizeof(struct host1x_intr_syncpt) *
> +			host1x->info.nb_pts,
> +			GFP_KERNEL);
> +
> +	if (!host1x->intr.syncpt)

The above blank line isn't necessary.

> +void host1x_intr_stop(struct host1x_intr *intr)
> +{
> +	unsigned int id;
> +	struct host1x *host1x = intr_to_host1x(intr);
> +	struct host1x_intr_syncpt *syncpt;
> +	u32 nb_pts = host1x_syncpt_nb_pts(intr_to_host1x(intr));
> +
> +	mutex_lock(&intr->mutex);
> +
> +	host1x->intr_op.disable_all_syncpt_intrs(intr);

I haven't commented on this everywhere, but I think this could benefit
from a wrapper that forwards this to the intr_op. The same goes for the
sync_op.

> +	for (id = 0, syncpt = intr->syncpt;
> +	     id < nb_pts;
> +	     ++id, ++syncpt) {

I don't think you need to explicitly keep track of syncpt within the for
statement. Instead you could either index intr->syncpt directly or
obtain a reference within the loop. It allows the for statement to be
written much more canonically.

> diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
[...]
> +#define intr_syncpt_to_intr(is) (is->intr)

This one doesn't buy you anything. It actually uses up more characters
so you can just drop it.

> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
[...]
> @@ -119,6 +122,166 @@ void host1x_syncpt_incr(struct host1x_syncpt *sp)
>  	host1x_syncpt_cpu_incr(sp);
>  }
>  
> +/*
> + * Updated sync point form hardware, and returns true if syncpoint is expired,
> + * false if we may need to wait
> + */
> +static bool syncpt_load_min_is_expired(
> +	struct host1x_syncpt *sp,
> +	u32 thresh)

This can all go on one line.

> +/*
> + * Main entrypoint for syncpoint value waits.
> + */
> +int host1x_syncpt_wait(struct host1x_syncpt *sp,
> +			u32 thresh, long timeout, u32 *value)
> +{
[...]
> +}
> +EXPORT_SYMBOL(host1x_syncpt_wait);

This doesn't only seem to be the main entrypoint, but it's basically the
only way to currently wait for syncpoints. One actual use-case where
this might turn out to be a problem is video capturing. The problem is
that using this API you can't very well asynchronously capture frames.
So eventually I think we need a way to allow a generic handler to be
attached to syncpoints so that you can have this handler continuously
invoked after each frame is captured and just pass the buffer back to
userspace.

> +bool host1x_syncpt_is_expired(
> +	struct host1x_syncpt *sp,
> +	u32 thresh)

This can go on one line.

Thierry
Terje Bergstrom Feb. 5, 2013, 4:29 a.m. UTC | #2
On 04.02.2013 02:30, Thierry Reding wrote:
> On Tue, Jan 15, 2013 at 01:43:58PM +0200, Terje Bergstrom wrote:
> [...]
>> diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
> [...]
>> @@ -95,7 +96,6 @@ static int host1x_probe(struct platform_device *dev)
>>
>>       /* set common host1x device data */
>>       platform_set_drvdata(dev, host);
>> -
>>       host->regs = devm_request_and_ioremap(&dev->dev, regs);
>>       if (!host->regs) {
>>               dev_err(&dev->dev, "failed to remap host registers\n");
> 
> This seems an unrelated (and actually undesirable) change.
> 
>> @@ -109,28 +109,40 @@ static int host1x_probe(struct platform_device *dev)
>>       }
>>
>>       err = host1x_syncpt_init(host);
>> -     if (err)
>> +     if (err) {
>> +             dev_err(&dev->dev, "failed to init sync points");
>>               return err;
>> +     }
> 
> This error message should probably have gone in the previous patch as
> well.

Oops, will move these to previous patch. I'm pretty sure I already fixed
this once. :-(

> 
>> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
>> index d8f5979..8376092 100644
>> --- a/drivers/gpu/host1x/dev.h
>> +++ b/drivers/gpu/host1x/dev.h
>> @@ -17,11 +17,12 @@
>>  #ifndef HOST1X_DEV_H
>>  #define HOST1X_DEV_H
>>
>> +#include <linux/platform_device.h>
>>  #include "syncpt.h"
>> +#include "intr.h"
>>
>>  struct host1x;
>>  struct host1x_syncpt;
>> -struct platform_device;
> 
> Why include platform_device.h here?

host1x_get_host() actually needs that, so this #include should've also
been in previous patch.

> 
>> @@ -34,6 +35,18 @@ struct host1x_syncpt_ops {
>>       const char * (*name)(struct host1x_syncpt *);
>>  };
>>
>> +struct host1x_intr_ops {
>> +     void (*init_host_sync)(struct host1x_intr *);
>> +     void (*set_host_clocks_per_usec)(
>> +             struct host1x_intr *, u32 clocks);
> 
> Could the above two not be combined? The only reason to keep them
> separate would be if the host1x clock was dynamically changed, but I
> don't think we support that, right?

I've left this as a placeholder to at some point start supporting host1x
clock scaling. But I don't think we're going to do that for a while, so
I could merge them.

> 
>> +     void (*set_syncpt_threshold)(
>> +             struct host1x_intr *, u32 id, u32 thresh);
>> +     void (*enable_syncpt_intr)(struct host1x_intr *, u32 id);
>> +     void (*disable_syncpt_intr)(struct host1x_intr *, u32 id);
>> +     void (*disable_all_syncpt_intrs)(struct host1x_intr *);
> 
> Can disable_all_syncpt_intrs() not be implemented generically using the
> number of syncpoints as exposed by host1x_device_info and the
> .disable_syncpt_intr() function?

disable_all_syncpt_intrs() disables all interrupts in one write (or one
per 32 sync points), so it's more efficient.

> 
>> @@ -46,11 +59,13 @@ struct host1x_device_info {
>>  struct host1x {
>>       void __iomem *regs;
>>       struct host1x_syncpt *syncpt;
>> +     struct host1x_intr intr;
>>       struct platform_device *dev;
>>       struct host1x_device_info info;
>>       struct clk *clk;
>>
>>       struct host1x_syncpt_ops syncpt_op;
>> +     struct host1x_intr_ops intr_op;
> 
> I think carrying a const pointer to the interrupt operations structure
> is a better option here.

Ok.

> 
>> diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
> [...]
>> +static void host1x_intr_syncpt_thresh_isr(struct host1x_intr_syncpt *syncpt);
> 
> Can we avoid this forward declaration?

I think we can, if I just move the isr to top of file.

> 
>> +static void syncpt_thresh_cascade_fn(struct work_struct *work)
> 
> syncpt_thresh_work()?

Sounds good.

> 
>> +{
>> +     struct host1x_intr_syncpt *sp =
>> +             container_of(work, struct host1x_intr_syncpt, work);
>> +     host1x_syncpt_thresh_fn(sp);
> 
> Couldn't we inline the host1x_syncpt_thresh_fn() implementation here?
> Why do we need to go through an external function declaration?

If I move syncpt_thresh_work() to intr.c from intr_hw.c, I could do
that. That'd simplify the interrupt path.

> 
>> +static irqreturn_t syncpt_thresh_cascade_isr(int irq, void *dev_id)
>> +{
>> +     struct host1x *host1x = dev_id;
>> +     struct host1x_intr *intr = &host1x->intr;
>> +     unsigned long reg;
>> +     int i, id;
>> +
>> +     for (i = 0; i < host1x->info.nb_pts / BITS_PER_LONG; i++) {
>> +             reg = host1x_sync_readl(host1x,
>> +                             HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS +
>> +                             i * REGISTER_STRIDE);
>> +             for_each_set_bit(id, &reg, BITS_PER_LONG) {
>> +                     struct host1x_intr_syncpt *sp =
>> +                             intr->syncpt + (i * BITS_PER_LONG + id);
>> +                     host1x_intr_syncpt_thresh_isr(sp);
> 
> Have you considered mimicking the IRQ API and name this something like
> host1x_intr_syncpt_thresh_handle() and name the actual ISR just
> syncpt_thresh_isr()? Not so important but it makes things a bit clearer
> in my opinion.

This gets a bit confusing, because we have an ISR that calls a function
that is also called ISR. I've kept "isr" in names of both to emphasize
that this is running in interrupt context. I'm open to renaming these to
make it clearer.

Did you refer to chained IRQ handler in linux/irq.h when you mentioned
IRQ API as reference for naming?

> 
>> +                     queue_work(intr->wq, &sp->work);
> 
> Should the call to queue_work() perhaps be moved into
> host1x_intr_syncpt_thresh_isr().

I'm not sure, either way would be ok to me. The current structure allows
host1x_intr_syncpt_thresh_isr() to only take one parameter
(host1x_intr_syncpt). If we move queue_work, we'd also need to pass
host1x_intr.

> 
>> +static void host1x_intr_init_host_sync(struct host1x_intr *intr)
>> +{
>> +     struct host1x *host1x = intr_to_host1x(intr);
>> +     int i, err;
>> +
>> +     host1x_sync_writel(host1x, 0xffffffffUL,
>> +             HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE);
>> +     host1x_sync_writel(host1x, 0xffffffffUL,
>> +             HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS);
>> +
>> +     for (i = 0; i < host1x->info.nb_pts; i++)
>> +             INIT_WORK(&intr->syncpt[i].work, syncpt_thresh_cascade_fn);
>> +
>> +     err = devm_request_irq(&host1x->dev->dev, intr->syncpt_irq,
>> +                             syncpt_thresh_cascade_isr,
>> +                             IRQF_SHARED, "host1x_syncpt", host1x);
>> +     WARN_ON(IS_ERR_VALUE(err));
> 
> Do we really want to continue in this case?

Hmm, we'd need to actually return an error code. There's not much the
driver can do without syncpt interrupts.

> 
>> +static void host1x_intr_set_syncpt_threshold(struct host1x_intr *intr,
>> +     u32 id, u32 thresh)
>> +{
>> +     struct host1x *host1x = intr_to_host1x(intr);
>> +     host1x_sync_writel(host1x, thresh,
>> +             HOST1X_SYNC_SYNCPT_INT_THRESH_0 + id * REGISTER_STRIDE);
>> +}
> 
> Again, maybe defining the register stride as part of the register
> definition might be better. I think HOST1X_SYNC_SYNCPT_INT_THRESH(id) is
> easier to read.

Sounds good.

> 
>> +static void host1x_intr_enable_syncpt_intr(struct host1x_intr *intr, u32 id)
>> +{
>> +     struct host1x *host1x = intr_to_host1x(intr);
>> +
>> +     host1x_sync_writel(host1x, BIT_MASK(id),
>> +                     HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0 +
>> +                     BIT_WORD(id) * REGISTER_STRIDE);
>> +}
> 
> Same here.

Yep.

> 
>> +static void host1x_intr_disable_syncpt_intr(struct host1x_intr *intr, u32 id)
>> +{
>> +     struct host1x *host1x = intr_to_host1x(intr);
>> +
>> +     host1x_sync_writel(host1x, BIT_MASK(id),
>> +                     HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE +
>> +                     BIT_WORD(id) * REGISTER_STRIDE);
>> +
>> +     host1x_sync_writel(host1x, BIT_MASK(id),
>> +             HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS +
>> +             BIT_WORD(id) * REGISTER_STRIDE);
>> +}
> 
> And here.

Yep.

> 
>> diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
> [...]
>> +#include "intr.h"
>> +#include <linux/interrupt.h>
>> +#include <linux/slab.h>
>> +#include <linux/irq.h>
>> +#include "dev.h"
> 
> More funky ordering of includes.

Will fix.

> 
>> +int host1x_intr_add_action(struct host1x_intr *intr, u32 id, u32 thresh,
>> +                     enum host1x_intr_action action, void *data,
>> +                     void *_waiter,
>> +                     void **ref)
> 
> Why do you pass in _waiter as void * and not struct host1x_waitlist *?

struct host1x_waitlist is defined inside intr.c, so I've chosen to pass
void *. I could naturally just forward declare host1x_waitlist in intr.h
and change the allocation and add_action to use that.

> 
> I think I've said this before. The interface doesn't seem optimal to me
> here. Passing in an enumeration to choose which action to perform looks
> difficult to work with (not to mention the symbols are rather long and
> therefore result in ugly code).
> 
> Maybe doing this by passing around a pointer to a handler function would
> be nicer. However since I haven't really used this yet, I can't really
> tell. So maybe we should just merge the implementation as-is for now. We
> can always clean it up later.

We're using the enum also to index into arrays. We do it so that we can
remove all the completed waiters from the wait_head, and insert them
into lists per action type. This way we can run all actions in priority
order: first action_submit_complete, then action_wakeup, and then
action_wakeup_interruptible.

Now, we're recently noticed that the priority order is actually wrong.
The first priority should be to wake up non-interruptible tasks, then
interruptible tasks. Cleaning up memory of completed submits should be
lower priority.

I've considered this part as something private to host1x driver and it's
not really meant to be called f.ex. from DRM. But, as you seem to have a
need to have an asynchronous wait for a fence, we'd need to figure
something out for that.

> 
>> +void *host1x_intr_alloc_waiter(void)
>> +{
>> +     return kzalloc(sizeof(struct host1x_waitlist), GFP_KERNEL);
>> +}
> 
> I'm not sure why this is separate from host1x_syncpt_wait() since it is
> only used inside that function and the waiter returned never leaves the
> scope of that function, so it might be better to allocate it directly
> in host1x_syncpt_wait() instead.
> 
> Actually, it looks like the waiter doesn't ever leave scope, so you may
> even want to allocate it on the stack.

In patch 3, at submit time we first allocate waiter, then take
submit_lock, write submit to channel, and add the waiter while having
the lock. I did this so that I host1x_intr_add_action() can always
succeed. Otherwise I'd need to write another code path to handle the
case where we wrote a job to channel, but we're not able to add a
submit_complete action to it.

> 
>> +void host1x_intr_put_ref(struct host1x_intr *intr, u32 id, void *ref)
> 
> Here again, you pass in the waiter via a void *. Why's that?

host1x_waitlist is hidden inside intr.c.

> 
>> +int host1x_intr_init(struct host1x_intr *intr, u32 irq_sync)
> 
> Maybe you should keep the type of the irq_sync here so that it properly
> propagates to the call to devm_request_irq().

I'm not sure what you mean. Do you mean that I should use unsigned int,
as that's the type used in devm_request_irq()?

> 
>> +{
>> +     unsigned int id;
>> +     struct host1x *host1x = intr_to_host1x(intr);
>> +     u32 nb_pts = host1x_syncpt_nb_pts(host1x);
>> +
>> +     intr->syncpt = devm_kzalloc(&host1x->dev->dev,
>> +                     sizeof(struct host1x_intr_syncpt) *
>> +                     host1x->info.nb_pts,
>> +                     GFP_KERNEL);
>> +
>> +     if (!host1x->intr.syncpt)
> 
> The above blank line isn't necessary.

Will remove.

> 
>> +void host1x_intr_stop(struct host1x_intr *intr)
>> +{
>> +     unsigned int id;
>> +     struct host1x *host1x = intr_to_host1x(intr);
>> +     struct host1x_intr_syncpt *syncpt;
>> +     u32 nb_pts = host1x_syncpt_nb_pts(intr_to_host1x(intr));
>> +
>> +     mutex_lock(&intr->mutex);
>> +
>> +     host1x->intr_op.disable_all_syncpt_intrs(intr);
> 
> I haven't commented on this everywhere, but I think this could benefit
> from a wrapper that forwards this to the intr_op. The same goes for the
> sync_op.

You mean something like "host1x_disable_all_syncpt_intrs"?

> 
>> +     for (id = 0, syncpt = intr->syncpt;
>> +          id < nb_pts;
>> +          ++id, ++syncpt) {
> 
> I don't think you need to explicitly keep track of syncpt within the for
> statement. Instead you could either index intr->syncpt directly or
> obtain a reference within the loop. It allows the for statement to be
> written much more canonically.

Yep, will do.

> 
>> diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
> [...]
>> +#define intr_syncpt_to_intr(is) (is->intr)
> 
> This one doesn't buy you anything. It actually uses up more characters
> so you can just drop it.

True, it's useless. I'll remove.

> 
>> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> [...]
>> @@ -119,6 +122,166 @@ void host1x_syncpt_incr(struct host1x_syncpt *sp)
>>       host1x_syncpt_cpu_incr(sp);
>>  }
>>
>> +/*
>> + * Updated sync point form hardware, and returns true if syncpoint is expired,
>> + * false if we may need to wait
>> + */
>> +static bool syncpt_load_min_is_expired(
>> +     struct host1x_syncpt *sp,
>> +     u32 thresh)
> 
> This can all go on one line.

Ok.

> 
>> +/*
>> + * Main entrypoint for syncpoint value waits.
>> + */
>> +int host1x_syncpt_wait(struct host1x_syncpt *sp,
>> +                     u32 thresh, long timeout, u32 *value)
>> +{
> [...]
>> +}
>> +EXPORT_SYMBOL(host1x_syncpt_wait);
> 
> This doesn't only seem to be the main entrypoint, but it's basically the
> only way to currently wait for syncpoints. One actual use-case where
> this might turn out to be a problem is video capturing. The problem is
> that using this API you can't very well asynchronously capture frames.
> So eventually I think we need a way to allow a generic handler to be
> attached to syncpoints so that you can have this handler continuously
> invoked after each frame is captured and just pass the buffer back to
> userspace.

Yep, so far all asynchronous waits have been done in user space. We
would probably allow attaching a handler to a syncpt value, so that we'd
call that handler once a value is reached. In effect, similar to a
wake_up event that is now added via host1x_intr_add_action, but simpler.
That'd mean that the handler needs to be re-added after each frame.

We could also add the handler as persistent if re-adding would be a
problem. That'd require some new wiring and I'll have to think how to
implement that.

> 
>> +bool host1x_syncpt_is_expired(
>> +     struct host1x_syncpt *sp,
>> +     u32 thresh)
> 
> This can go on one line.

Will join.

Terje
Thierry Reding Feb. 5, 2013, 8:42 a.m. UTC | #3
On Mon, Feb 04, 2013 at 08:29:08PM -0800, Terje Bergström wrote:
> On 04.02.2013 02:30, Thierry Reding wrote:
> >> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> >> index d8f5979..8376092 100644
> >> --- a/drivers/gpu/host1x/dev.h
> >> +++ b/drivers/gpu/host1x/dev.h
> >> @@ -17,11 +17,12 @@
> >>  #ifndef HOST1X_DEV_H
> >>  #define HOST1X_DEV_H
> >>
> >> +#include <linux/platform_device.h>
> >>  #include "syncpt.h"
> >> +#include "intr.h"
> >>
> >>  struct host1x;
> >>  struct host1x_syncpt;
> >> -struct platform_device;
> > 
> > Why include platform_device.h here?
> 
> host1x_get_host() actually needs that, so this #include should've also
> been in previous patch.

No need to if you pass struct device * instead. You might need
linux/device.h instead, though.

> >> +     void (*set_syncpt_threshold)(
> >> +             struct host1x_intr *, u32 id, u32 thresh);
> >> +     void (*enable_syncpt_intr)(struct host1x_intr *, u32 id);
> >> +     void (*disable_syncpt_intr)(struct host1x_intr *, u32 id);
> >> +     void (*disable_all_syncpt_intrs)(struct host1x_intr *);
> > 
> > Can disable_all_syncpt_intrs() not be implemented generically using the
> > number of syncpoints as exposed by host1x_device_info and the
> > .disable_syncpt_intr() function?
> 
> disable_all_syncpt_intrs() disables all interrupts in one write (or one
> per 32 sync points), so it's more efficient.

Yes, I noticed that and failed to remove this comment.

> >> +{
> >> +     struct host1x_intr_syncpt *sp =
> >> +             container_of(work, struct host1x_intr_syncpt, work);
> >> +     host1x_syncpt_thresh_fn(sp);
> > 
> > Couldn't we inline the host1x_syncpt_thresh_fn() implementation here?
> > Why do we need to go through an external function declaration?
> 
> If I move syncpt_thresh_work() to intr.c from intr_hw.c, I could do
> that. That'd simplify the interrupt path.

I like simplification. =)

> >> +static irqreturn_t syncpt_thresh_cascade_isr(int irq, void *dev_id)
> >> +{
> >> +     struct host1x *host1x = dev_id;
> >> +     struct host1x_intr *intr = &host1x->intr;
> >> +     unsigned long reg;
> >> +     int i, id;
> >> +
> >> +     for (i = 0; i < host1x->info.nb_pts / BITS_PER_LONG; i++) {
> >> +             reg = host1x_sync_readl(host1x,
> >> +                             HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS +
> >> +                             i * REGISTER_STRIDE);
> >> +             for_each_set_bit(id, &reg, BITS_PER_LONG) {
> >> +                     struct host1x_intr_syncpt *sp =
> >> +                             intr->syncpt + (i * BITS_PER_LONG + id);
> >> +                     host1x_intr_syncpt_thresh_isr(sp);
> > 
> > Have you considered mimicking the IRQ API and name this something like
> > host1x_intr_syncpt_thresh_handle() and name the actual ISR just
> > syncpt_thresh_isr()? Not so important but it makes things a bit clearer
> > in my opinion.
> 
> This gets a bit confusing, because we have an ISR that calls a function
> that is also called ISR. I've kept "isr" in names of both to emphasize
> that this is running in interrupt context. I'm open to renaming these to
> make it clearer.
> 
> Did you refer to chained IRQ handler in linux/irq.h when you mentioned
> IRQ API as reference for naming?

What I had in mind was more along the lines of kernel/irq/chip.c, which
has a bunch of handlers for various types of interrupts, such as
handle_nested_irq() or handle_simple_irq().

Hence my proposal to rename host1x_intr_syncpt_thresh_isr() to
host1x_intr_syncpt_handle() because it handles the interrupt from a
single syncpoint and syncpt_thresh_cascade_isr() to syncpt_thresh_isr()
to keep it shorter.

Another variant would be host1x_syncpt_irq() for the top-level handler
and something host1x_handle_syncpt() to handle individual syncpoints. I
like this one best, but this is pure bike-shedding and there's nothing
technically wrong with the names you chose, so I can't really object if
you want to stick to them.

> >> +                     queue_work(intr->wq, &sp->work);
> > 
> > Should the call to queue_work() perhaps be moved into
> > host1x_intr_syncpt_thresh_isr().
> 
> I'm not sure, either way would be ok to me. The current structure allows
> host1x_intr_syncpt_thresh_isr() to only take one parameter
> (host1x_intr_syncpt). If we move queue_work, we'd also need to pass
> host1x_intr.

I think I'd still prefer to have all the code in one function because it
make subsequent modification easier and less error-prone.

> >> +static void host1x_intr_init_host_sync(struct host1x_intr *intr)
> >> +{
> >> +     struct host1x *host1x = intr_to_host1x(intr);
> >> +     int i, err;
> >> +
> >> +     host1x_sync_writel(host1x, 0xffffffffUL,
> >> +             HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE);
> >> +     host1x_sync_writel(host1x, 0xffffffffUL,
> >> +             HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS);
> >> +
> >> +     for (i = 0; i < host1x->info.nb_pts; i++)
> >> +             INIT_WORK(&intr->syncpt[i].work, syncpt_thresh_cascade_fn);
> >> +
> >> +     err = devm_request_irq(&host1x->dev->dev, intr->syncpt_irq,
> >> +                             syncpt_thresh_cascade_isr,
> >> +                             IRQF_SHARED, "host1x_syncpt", host1x);
> >> +     WARN_ON(IS_ERR_VALUE(err));
> > 
> > Do we really want to continue in this case?
> 
> Hmm, we'd need to actually return an error code. There's not much the
> driver can do without syncpt interrupts.

Yeah, in that case I think we should bail out. It's not like we're
expecting any failures. If the interrupt cannot be requested, something
must seriously be wrong and we should tell users about it so that it can
be fixed. Trying to continue on a best effort basis isn't useful here, I
think.

> >> +int host1x_intr_add_action(struct host1x_intr *intr, u32 id, u32 thresh,
> >> +                     enum host1x_intr_action action, void *data,
> >> +                     void *_waiter,
> >> +                     void **ref)
> > 
> > Why do you pass in _waiter as void * and not struct host1x_waitlist *?
> 
> struct host1x_waitlist is defined inside intr.c, so I've chosen to pass
> void *. I could naturally just forward declare host1x_waitlist in intr.h
> and change the allocation and add_action to use that.

Yes, that's definitely better.

> > I think I've said this before. The interface doesn't seem optimal to me
> > here. Passing in an enumeration to choose which action to perform looks
> > difficult to work with (not to mention the symbols are rather long and
> > therefore result in ugly code).
> > 
> > Maybe doing this by passing around a pointer to a handler function would
> > be nicer. However since I haven't really used this yet, I can't really
> > tell. So maybe we should just merge the implementation as-is for now. We
> > can always clean it up later.
> 
> We're using the enum also to index into arrays. We do it so that we can
> remove all the completed waiters from the wait_head, and insert them
> into lists per action type. This way we can run all actions in priority
> order: first action_submit_complete, then action_wakeup, and then
> action_wakeup_interruptible.
> 
> Now, we're recently noticed that the priority order is actually wrong.
> The first priority should be to wake up non-interruptible tasks, then
> interruptible tasks. Cleaning up memory of completed submits should be
> lower priority.
> 
> I've considered this part as something private to host1x driver and it's
> not really meant to be called f.ex. from DRM. But, as you seem to have a
> need to have an asynchronous wait for a fence, we'd need to figure
> something out for that.

Okay, let's keep it as-is for now and see how it can be improved later
when we have an actual use-case for using it externally.

> >> +void *host1x_intr_alloc_waiter(void)
> >> +{
> >> +     return kzalloc(sizeof(struct host1x_waitlist), GFP_KERNEL);
> >> +}
> > 
> > I'm not sure why this is separate from host1x_syncpt_wait() since it is
> > only used inside that function and the waiter returned never leaves the
> > scope of that function, so it might be better to allocate it directly
> > in host1x_syncpt_wait() instead.
> > 
> > Actually, it looks like the waiter doesn't ever leave scope, so you may
> > even want to allocate it on the stack.
> 
> In patch 3, at submit time we first allocate waiter, then take
> submit_lock, write submit to channel, and add the waiter while having
> the lock. I did this so that I host1x_intr_add_action() can always
> succeed. Otherwise I'd need to write another code path to handle the
> case where we wrote a job to channel, but we're not able to add a
> submit_complete action to it.

Okay. In that case why not allocate it on the stack in the first place
so you don't have to bother with allocations (and potential failure) at
all? The variable doesn't leave the function scope, so there shouldn't
be any issues, right?

Or if that doesn't work it would still be preferable to allocate memory
in host1x_syncpt_wait() directly instead of going through the wrapper.

> >> +void host1x_intr_put_ref(struct host1x_intr *intr, u32 id, void *ref)
> > 
> > Here again, you pass in the waiter via a void *. Why's that?
> 
> host1x_waitlist is hidden inside intr.c.

I don't think that's necessary here. I'd rather have the compiler check
for types rather than hide the structure.

> >> +int host1x_intr_init(struct host1x_intr *intr, u32 irq_sync)
> > 
> > Maybe you should keep the type of the irq_sync here so that it properly
> > propagates to the call to devm_request_irq().
> 
> I'm not sure what you mean. Do you mean that I should use unsigned int,
> as that's the type used in devm_request_irq()?

Yes.

> >> +void host1x_intr_stop(struct host1x_intr *intr)
> >> +{
> >> +     unsigned int id;
> >> +     struct host1x *host1x = intr_to_host1x(intr);
> >> +     struct host1x_intr_syncpt *syncpt;
> >> +     u32 nb_pts = host1x_syncpt_nb_pts(intr_to_host1x(intr));
> >> +
> >> +     mutex_lock(&intr->mutex);
> >> +
> >> +     host1x->intr_op.disable_all_syncpt_intrs(intr);
> > 
> > I haven't commented on this everywhere, but I think this could benefit
> > from a wrapper that forwards this to the intr_op. The same goes for the
> > sync_op.
> 
> You mean something like "host1x_disable_all_syncpt_intrs"?

Yes. I think that'd be useful for each of the op functions. Perhaps you
could even pass in a struct host1x * to make calls more uniform.

> >> +/*
> >> + * Main entrypoint for syncpoint value waits.
> >> + */
> >> +int host1x_syncpt_wait(struct host1x_syncpt *sp,
> >> +                     u32 thresh, long timeout, u32 *value)
> >> +{
> > [...]
> >> +}
> >> +EXPORT_SYMBOL(host1x_syncpt_wait);
> > 
> > This doesn't only seem to be the main entrypoint, but it's basically the
> > only way to currently wait for syncpoints. One actual use-case where
> > this might turn out to be a problem is video capturing. The problem is
> > that using this API you can't very well asynchronously capture frames.
> > So eventually I think we need a way to allow a generic handler to be
> > attached to syncpoints so that you can have this handler continuously
> > invoked after each frame is captured and just pass the buffer back to
> > userspace.
> 
> Yep, so far all asynchronous waits have been done in user space. We
> would probably allow attaching a handler to a syncpt value, so that we'd
> call that handler once a value is reached. In effect, similar to a
> wake_up event that is now added via host1x_intr_add_action, but simpler.
> That'd mean that the handler needs to be re-added after each frame.
> 
> We could also add the handler as persistent if re-adding would be a
> problem. That'd require some new wiring and I'll have to think how to
> implement that.

Yes, that sounds like what I had in mind. Again, no need to worry about
it now. We can cross that bridge when we come to it.

Thierry
Terje Bergstrom Feb. 6, 2013, 8:29 p.m. UTC | #4
On 05.02.2013 00:42, Thierry Reding wrote:
> On Mon, Feb 04, 2013 at 08:29:08PM -0800, Terje Bergström wrote:
>> host1x_get_host() actually needs that, so this #include should've also
>> been in previous patch.
> 
> No need to if you pass struct device * instead. You might need
> linux/device.h instead, though.

Can do.

> Another variant would be host1x_syncpt_irq() for the top-level handler
> and something host1x_handle_syncpt() to handle individual syncpoints. I
> like this one best, but this is pure bike-shedding and there's nothing
> technically wrong with the names you chose, so I can't really object if
> you want to stick to them.

I could use these names. They sound logical to me,too.

> 
>>>> +                     queue_work(intr->wq, &sp->work);
>>>
>>> Should the call to queue_work() perhaps be moved into
>>> host1x_intr_syncpt_thresh_isr().
>>
>> I'm not sure, either way would be ok to me. The current structure allows
>> host1x_intr_syncpt_thresh_isr() to only take one parameter
>> (host1x_intr_syncpt). If we move queue_work, we'd also need to pass
>> host1x_intr.
> 
> I think I'd still prefer to have all the code in one function because it
> make subsequent modification easier and less error-prone.

Ok, I'll do that change.

> Yeah, in that case I think we should bail out. It's not like we're
> expecting any failures. If the interrupt cannot be requested, something
> must seriously be wrong and we should tell users about it so that it can
> be fixed. Trying to continue on a best effort basis isn't useful here, I
> think.

Yep, I agree.

>> In patch 3, at submit time we first allocate waiter, then take
>> submit_lock, write submit to channel, and add the waiter while having
>> the lock. I did this so that I host1x_intr_add_action() can always
>> succeed. Otherwise I'd need to write another code path to handle the
>> case where we wrote a job to channel, but we're not able to add a
>> submit_complete action to it.
> 
> Okay. In that case why not allocate it on the stack in the first place
> so you don't have to bother with allocations (and potential failure) at
> all? The variable doesn't leave the function scope, so there shouldn't
> be any issues, right?

The submit code in patch 3 allocates a waiter, and the waiter outlives
the function scope. That waiter will clean up job queue once a job is
complete.

> Or if that doesn't work it would still be preferable to allocate memory
> in host1x_syncpt_wait() directly instead of going through the wrapper.

This was done purely, because I'm hiding the struct size from the
caller. If the caller needs to allocate, I need to expose the struct in
a header, not just a forward declaration.

>>>> +int host1x_intr_init(struct host1x_intr *intr, u32 irq_sync)
>>>
>>> Maybe you should keep the type of the irq_sync here so that it properly
>>> propagates to the call to devm_request_irq().
>>
>> I'm not sure what you mean. Do you mean that I should use unsigned int,
>> as that's the type used in devm_request_irq()?
> 
> Yes.

Ok, will do.

>>>> +void host1x_intr_stop(struct host1x_intr *intr)
>>>> +{
>>>> +     unsigned int id;
>>>> +     struct host1x *host1x = intr_to_host1x(intr);
>>>> +     struct host1x_intr_syncpt *syncpt;
>>>> +     u32 nb_pts = host1x_syncpt_nb_pts(intr_to_host1x(intr));
>>>> +
>>>> +     mutex_lock(&intr->mutex);
>>>> +
>>>> +     host1x->intr_op.disable_all_syncpt_intrs(intr);
>>>
>>> I haven't commented on this everywhere, but I think this could benefit
>>> from a wrapper that forwards this to the intr_op. The same goes for the
>>> sync_op.
>>
>> You mean something like "host1x_disable_all_syncpt_intrs"?
> 
> Yes. I think that'd be useful for each of the op functions. Perhaps you
> could even pass in a struct host1x * to make calls more uniform.

Ok, I'll add the wrapper, and I'll check if passing struct host1x *
would make sense. In effect that'd render struct host1x_intr mostly
unused, so how about if we just merge the contents of host1x_intr to host1x?

Terje
Thierry Reding Feb. 6, 2013, 8:38 p.m. UTC | #5
On Wed, Feb 06, 2013 at 12:29:26PM -0800, Terje Bergström wrote:
> On 05.02.2013 00:42, Thierry Reding wrote:
[...]
> > Or if that doesn't work it would still be preferable to allocate memory
> > in host1x_syncpt_wait() directly instead of going through the wrapper.
> 
> This was done purely, because I'm hiding the struct size from the
> caller. If the caller needs to allocate, I need to expose the struct in
> a header, not just a forward declaration.

I don't think we need to hide the struct from the caller. This is all
host1x internal. Even if a host1x client uses the struct it makes little
sense to hide it. They are all part of the same code base so there's not
much to be gained by hiding the structure definition.

> >>>> +void host1x_intr_stop(struct host1x_intr *intr)
> >>>> +{
> >>>> +     unsigned int id;
> >>>> +     struct host1x *host1x = intr_to_host1x(intr);
> >>>> +     struct host1x_intr_syncpt *syncpt;
> >>>> +     u32 nb_pts = host1x_syncpt_nb_pts(intr_to_host1x(intr));
> >>>> +
> >>>> +     mutex_lock(&intr->mutex);
> >>>> +
> >>>> +     host1x->intr_op.disable_all_syncpt_intrs(intr);
> >>>
> >>> I haven't commented on this everywhere, but I think this could benefit
> >>> from a wrapper that forwards this to the intr_op. The same goes for the
> >>> sync_op.
> >>
> >> You mean something like "host1x_disable_all_syncpt_intrs"?
> > 
> > Yes. I think that'd be useful for each of the op functions. Perhaps you
> > could even pass in a struct host1x * to make calls more uniform.
> 
> Ok, I'll add the wrapper, and I'll check if passing struct host1x *
> would make sense. In effect that'd render struct host1x_intr mostly
> unused, so how about if we just merge the contents of host1x_intr to host1x?

We can probably do that. It might make some sense to keep it in order to
scope the related fields but struct host1x isn't very large yet, so I
think omitting host1x_intr should be fine.

Thierry
Terje Bergstrom Feb. 6, 2013, 8:41 p.m. UTC | #6
On 06.02.2013 12:38, Thierry Reding wrote:
> On Wed, Feb 06, 2013 at 12:29:26PM -0800, Terje Bergström wrote:
>> This was done purely, because I'm hiding the struct size from the
>> caller. If the caller needs to allocate, I need to expose the struct in
>> a header, not just a forward declaration.
> 
> I don't think we need to hide the struct from the caller. This is all
> host1x internal. Even if a host1x client uses the struct it makes little
> sense to hide it. They are all part of the same code base so there's not
> much to be gained by hiding the structure definition.

I agree, and will change.

>> Ok, I'll add the wrapper, and I'll check if passing struct host1x *
>> would make sense. In effect that'd render struct host1x_intr mostly
>> unused, so how about if we just merge the contents of host1x_intr to host1x?
> 
> We can probably do that. It might make some sense to keep it in order to
> scope the related fields but struct host1x isn't very large yet, so I
> think omitting host1x_intr should be fine.

Yes, it's not very large, and it'd remove a lot of casting between
host1x and host1x_intr, so I'll just do that.

Terje
diff mbox

Patch

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index 363e6ab..5ef47ff 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -3,6 +3,7 @@  ccflags-y = -Idrivers/gpu/host1x
 host1x-y = \
 	syncpt.o \
 	dev.o \
+	intr.o \
 	hw/host1x01.o
 
 obj-$(CONFIG_TEGRA_HOST1X) += host1x.o
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index cd2b1ef..7f9f389 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -24,6 +24,7 @@ 
 #include <linux/clk.h>
 #include <linux/io.h>
 #include "dev.h"
+#include "intr.h"
 #include "hw/host1x01.h"
 
 #define CREATE_TRACE_POINTS
@@ -95,7 +96,6 @@  static int host1x_probe(struct platform_device *dev)
 
 	/* set common host1x device data */
 	platform_set_drvdata(dev, host);
-
 	host->regs = devm_request_and_ioremap(&dev->dev, regs);
 	if (!host->regs) {
 		dev_err(&dev->dev, "failed to remap host registers\n");
@@ -109,28 +109,40 @@  static int host1x_probe(struct platform_device *dev)
 	}
 
 	err = host1x_syncpt_init(host);
-	if (err)
+	if (err) {
+		dev_err(&dev->dev, "failed to init sync points");
 		return err;
+	}
+
+	err = host1x_intr_init(&host->intr, syncpt_irq);
+	if (err) {
+		dev_err(&dev->dev, "failed to init irq");
+		goto fail_deinit_syncpt;
+	}
 
 	host->clk = devm_clk_get(&dev->dev, NULL);
 	if (IS_ERR(host->clk)) {
 		dev_err(&dev->dev, "failed to get clock\n");
 		err = PTR_ERR(host->clk);
-		goto fail_deinit_syncpt;
+		goto fail_deinit_intr;
 	}
 
 	err = clk_prepare_enable(host->clk);
 	if (err < 0) {
 		dev_err(&dev->dev, "failed to enable clock\n");
-		goto fail_deinit_syncpt;
+		goto fail_deinit_intr;
 	}
 
 	host1x_syncpt_reset(host);
 
+	host1x_intr_start(&host->intr, clk_get_rate(host->clk));
+
 	dev_info(&dev->dev, "initialized\n");
 
 	return 0;
 
+fail_deinit_intr:
+	host1x_intr_deinit(&host->intr);
 fail_deinit_syncpt:
 	host1x_syncpt_deinit(host);
 	return err;
@@ -139,6 +151,7 @@  fail_deinit_syncpt:
 static int __exit host1x_remove(struct platform_device *dev)
 {
 	struct host1x *host = platform_get_drvdata(dev);
+	host1x_intr_deinit(&host->intr);
 	host1x_syncpt_deinit(host);
 	clk_disable_unprepare(host->clk);
 	return 0;
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index d8f5979..8376092 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -17,11 +17,12 @@ 
 #ifndef HOST1X_DEV_H
 #define HOST1X_DEV_H
 
+#include <linux/platform_device.h>
 #include "syncpt.h"
+#include "intr.h"
 
 struct host1x;
 struct host1x_syncpt;
-struct platform_device;
 
 struct host1x_syncpt_ops {
 	void (*reset)(struct host1x_syncpt *);
@@ -34,6 +35,18 @@  struct host1x_syncpt_ops {
 	const char * (*name)(struct host1x_syncpt *);
 };
 
+struct host1x_intr_ops {
+	void (*init_host_sync)(struct host1x_intr *);
+	void (*set_host_clocks_per_usec)(
+		struct host1x_intr *, u32 clocks);
+	void (*set_syncpt_threshold)(
+		struct host1x_intr *, u32 id, u32 thresh);
+	void (*enable_syncpt_intr)(struct host1x_intr *, u32 id);
+	void (*disable_syncpt_intr)(struct host1x_intr *, u32 id);
+	void (*disable_all_syncpt_intrs)(struct host1x_intr *);
+	int (*free_syncpt_irq)(struct host1x_intr *);
+};
+
 struct host1x_device_info {
 	int	nb_channels;		/* host1x: num channels supported */
 	int	nb_pts;			/* host1x: num syncpoints supported */
@@ -46,11 +59,13 @@  struct host1x_device_info {
 struct host1x {
 	void __iomem *regs;
 	struct host1x_syncpt *syncpt;
+	struct host1x_intr intr;
 	struct platform_device *dev;
 	struct host1x_device_info info;
 	struct clk *clk;
 
 	struct host1x_syncpt_ops syncpt_op;
+	struct host1x_intr_ops intr_op;
 
 	struct dentry *debugfs;
 };
diff --git a/drivers/gpu/host1x/hw/host1x01.c b/drivers/gpu/host1x/hw/host1x01.c
index ea6e604..3d633a3 100644
--- a/drivers/gpu/host1x/hw/host1x01.c
+++ b/drivers/gpu/host1x/hw/host1x01.c
@@ -26,10 +26,12 @@ 
 #include "hw/host1x01_hardware.h"
 
 #include "hw/syncpt_hw.c"
+#include "hw/intr_hw.c"
 
 int host1x01_init(struct host1x *host)
 {
 	host->syncpt_op = host1x_syncpt_ops;
+	host->intr_op = host1x_intr_ops;
 
 	return 0;
 }
diff --git a/drivers/gpu/host1x/hw/hw_host1x01_sync.h b/drivers/gpu/host1x/hw/hw_host1x01_sync.h
index b12c1a4..5da9afb 100644
--- a/drivers/gpu/host1x/hw/hw_host1x01_sync.h
+++ b/drivers/gpu/host1x/hw/hw_host1x01_sync.h
@@ -51,12 +51,54 @@ 
 #ifndef __hw_host1x01_sync_h__
 #define __hw_host1x01_sync_h__
 
+static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(void)
+{
+	return 0x40;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS \
+	host1x_sync_syncpt_thresh_cpu0_int_status_r()
+static inline u32 host1x_sync_syncpt_thresh_int_disable_r(void)
+{
+	return 0x60;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE \
+	host1x_sync_syncpt_thresh_int_disable_r()
+static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(void)
+{
+	return 0x68;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0 \
+	host1x_sync_syncpt_thresh_int_enable_cpu0_r()
+static inline u32 host1x_sync_usec_clk_r(void)
+{
+	return 0x1a4;
+}
+#define HOST1X_SYNC_USEC_CLK \
+	host1x_sync_usec_clk_r()
+static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void)
+{
+	return 0x1a8;
+}
+#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \
+	host1x_sync_ctxsw_timeout_cfg_r()
+static inline u32 host1x_sync_ip_busy_timeout_r(void)
+{
+	return 0x1bc;
+}
+#define HOST1X_SYNC_IP_BUSY_TIMEOUT \
+	host1x_sync_ip_busy_timeout_r()
 static inline u32 host1x_sync_syncpt_0_r(void)
 {
 	return 0x400;
 }
 #define HOST1X_SYNC_SYNCPT_0 \
 	host1x_sync_syncpt_0_r()
+static inline u32 host1x_sync_syncpt_int_thresh_0_r(void)
+{
+	return 0x500;
+}
+#define HOST1X_SYNC_SYNCPT_INT_THRESH_0 \
+	host1x_sync_syncpt_int_thresh_0_r()
 static inline u32 host1x_sync_syncpt_base_0_r(void)
 {
 	return 0x600;
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
new file mode 100644
index 0000000..12488e2
--- /dev/null
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -0,0 +1,178 @@ 
+/*
+ * Tegra host1x Interrupt Management
+ *
+ * Copyright (C) 2010 Google, Inc.
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <asm/mach/irq.h>
+
+#include "intr.h"
+#include "dev.h"
+
+/* Spacing between sync registers */
+#define REGISTER_STRIDE 4
+
+static void host1x_intr_syncpt_thresh_isr(struct host1x_intr_syncpt *syncpt);
+
+static void syncpt_thresh_cascade_fn(struct work_struct *work)
+{
+	struct host1x_intr_syncpt *sp =
+		container_of(work, struct host1x_intr_syncpt, work);
+	host1x_syncpt_thresh_fn(sp);
+}
+
+static irqreturn_t syncpt_thresh_cascade_isr(int irq, void *dev_id)
+{
+	struct host1x *host1x = dev_id;
+	struct host1x_intr *intr = &host1x->intr;
+	unsigned long reg;
+	int i, id;
+
+	for (i = 0; i < host1x->info.nb_pts / BITS_PER_LONG; i++) {
+		reg = host1x_sync_readl(host1x,
+				HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS +
+				i * REGISTER_STRIDE);
+		for_each_set_bit(id, &reg, BITS_PER_LONG) {
+			struct host1x_intr_syncpt *sp =
+				intr->syncpt + (i * BITS_PER_LONG + id);
+			host1x_intr_syncpt_thresh_isr(sp);
+			queue_work(intr->wq, &sp->work);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void host1x_intr_init_host_sync(struct host1x_intr *intr)
+{
+	struct host1x *host1x = intr_to_host1x(intr);
+	int i, err;
+
+	host1x_sync_writel(host1x, 0xffffffffUL,
+		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE);
+	host1x_sync_writel(host1x, 0xffffffffUL,
+		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS);
+
+	for (i = 0; i < host1x->info.nb_pts; i++)
+		INIT_WORK(&intr->syncpt[i].work, syncpt_thresh_cascade_fn);
+
+	err = devm_request_irq(&host1x->dev->dev, intr->syncpt_irq,
+				syncpt_thresh_cascade_isr,
+				IRQF_SHARED, "host1x_syncpt", host1x);
+	WARN_ON(IS_ERR_VALUE(err));
+
+	/* disable the ip_busy_timeout. this prevents write drops */
+	host1x_sync_writel(host1x, 0, HOST1X_SYNC_IP_BUSY_TIMEOUT);
+
+	/*
+	 * increase the auto-ack timout to the maximum value. 2d will hang
+	 * otherwise on Tegra2.
+	 */
+	host1x_sync_writel(host1x, 0xff, HOST1X_SYNC_CTXSW_TIMEOUT_CFG);
+}
+
+static void host1x_intr_set_host_clocks_per_usec(struct host1x_intr *intr,
+		u32 cpm)
+{
+	struct host1x *host1x = intr_to_host1x(intr);
+	/* write microsecond clock register */
+	host1x_sync_writel(host1x, cpm, HOST1X_SYNC_USEC_CLK);
+}
+
+static void host1x_intr_set_syncpt_threshold(struct host1x_intr *intr,
+	u32 id, u32 thresh)
+{
+	struct host1x *host1x = intr_to_host1x(intr);
+	host1x_sync_writel(host1x, thresh,
+		HOST1X_SYNC_SYNCPT_INT_THRESH_0 + id * REGISTER_STRIDE);
+}
+
+static void host1x_intr_enable_syncpt_intr(struct host1x_intr *intr, u32 id)
+{
+	struct host1x *host1x = intr_to_host1x(intr);
+
+	host1x_sync_writel(host1x, BIT_MASK(id),
+			HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0 +
+			BIT_WORD(id) * REGISTER_STRIDE);
+}
+
+static void host1x_intr_disable_syncpt_intr(struct host1x_intr *intr, u32 id)
+{
+	struct host1x *host1x = intr_to_host1x(intr);
+
+	host1x_sync_writel(host1x, BIT_MASK(id),
+			HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE +
+			BIT_WORD(id) * REGISTER_STRIDE);
+
+	host1x_sync_writel(host1x, BIT_MASK(id),
+		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS +
+		BIT_WORD(id) * REGISTER_STRIDE);
+}
+
+static void host1x_intr_disable_all_syncpt_intrs(struct host1x_intr *intr)
+{
+	struct host1x *host1x = intr_to_host1x(intr);
+	u32 reg;
+
+	for (reg = 0; reg <= BIT_WORD(host1x->info.nb_pts) * REGISTER_STRIDE;
+			reg += REGISTER_STRIDE) {
+		host1x_sync_writel(host1x, 0xffffffffu,
+				HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE +
+				reg);
+
+		host1x_sync_writel(host1x, 0xffffffffu,
+			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS + reg);
+	}
+}
+
+/*
+ * Sync point threshold interrupt service function
+ * Handles sync point threshold triggers, in interrupt context
+ */
+static void host1x_intr_syncpt_thresh_isr(struct host1x_intr_syncpt *syncpt)
+{
+	unsigned int id = syncpt->id;
+	struct host1x_intr *intr = intr_syncpt_to_intr(syncpt);
+	struct host1x *host1x = intr_to_host1x(intr);
+	u32 reg = BIT_WORD(id) * REGISTER_STRIDE;
+
+	host1x_sync_writel(host1x, BIT_MASK(id),
+		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE + reg);
+	host1x_sync_writel(host1x, BIT_MASK(id),
+		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS + reg);
+}
+
+static int host1x_free_syncpt_irq(struct host1x_intr *intr)
+{
+	struct host1x *host1x = intr_to_host1x(intr);
+
+	devm_free_irq(&host1x->dev->dev, intr->syncpt_irq, host1x);
+	flush_workqueue(intr->wq);
+	return 0;
+}
+
+static const struct host1x_intr_ops host1x_intr_ops = {
+	.init_host_sync = host1x_intr_init_host_sync,
+	.set_host_clocks_per_usec = host1x_intr_set_host_clocks_per_usec,
+	.set_syncpt_threshold = host1x_intr_set_syncpt_threshold,
+	.enable_syncpt_intr = host1x_intr_enable_syncpt_intr,
+	.disable_syncpt_intr = host1x_intr_disable_syncpt_intr,
+	.disable_all_syncpt_intrs = host1x_intr_disable_all_syncpt_intrs,
+	.free_syncpt_irq = host1x_free_syncpt_irq,
+};
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
new file mode 100644
index 0000000..26099b8
--- /dev/null
+++ b/drivers/gpu/host1x/intr.c
@@ -0,0 +1,356 @@ 
+/*
+ * Tegra host1x Interrupt Management
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "intr.h"
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+#include "dev.h"
+
+/* Wait list management */
+
+struct host1x_waitlist {
+	struct list_head list;
+	struct kref refcount;
+	u32 thresh;
+	enum host1x_intr_action action;
+	atomic_t state;
+	void *data;
+	int count;
+};
+
+enum waitlist_state {
+	WLS_PENDING,
+	WLS_REMOVED,
+	WLS_CANCELLED,
+	WLS_HANDLED
+};
+
+static void waiter_release(struct kref *kref)
+{
+	kfree(container_of(kref, struct host1x_waitlist, refcount));
+}
+
+/*
+ * add a waiter to a waiter queue, sorted by threshold
+ * returns true if it was added at the head of the queue
+ */
+static bool add_waiter_to_queue(struct host1x_waitlist *waiter,
+				struct list_head *queue)
+{
+	struct host1x_waitlist *pos;
+	u32 thresh = waiter->thresh;
+
+	list_for_each_entry_reverse(pos, queue, list)
+		if ((s32)(pos->thresh - thresh) <= 0) {
+			list_add(&waiter->list, &pos->list);
+			return false;
+		}
+
+	list_add(&waiter->list, queue);
+	return true;
+}
+
+/*
+ * run through a waiter queue for a single sync point ID
+ * and gather all completed waiters into lists by actions
+ */
+static void remove_completed_waiters(struct list_head *head, u32 sync,
+			struct list_head completed[HOST1X_INTR_ACTION_COUNT])
+{
+	struct list_head *dest;
+	struct host1x_waitlist *waiter, *next;
+
+	list_for_each_entry_safe(waiter, next, head, list) {
+		if ((s32)(waiter->thresh - sync) > 0)
+			break;
+
+		dest = completed + waiter->action;
+
+		/* PENDING->REMOVED or CANCELLED->HANDLED */
+		if (atomic_inc_return(&waiter->state) == WLS_HANDLED || !dest) {
+			list_del(&waiter->list);
+			kref_put(&waiter->refcount, waiter_release);
+		} else {
+			list_move_tail(&waiter->list, dest);
+		}
+	}
+}
+
+static void reset_threshold_interrupt(struct host1x_intr *intr,
+			       struct list_head *head,
+			       unsigned int id)
+{
+	struct host1x *host1x = intr_to_host1x(intr);
+	u32 thresh = list_first_entry(head,
+				struct host1x_waitlist, list)->thresh;
+
+	host1x->intr_op.set_syncpt_threshold(intr, id, thresh);
+	host1x->intr_op.enable_syncpt_intr(intr, id);
+}
+
+static void action_wakeup(struct host1x_waitlist *waiter)
+{
+	wait_queue_head_t *wq = waiter->data;
+
+	wake_up(wq);
+}
+
+static void action_wakeup_interruptible(struct host1x_waitlist *waiter)
+{
+	wait_queue_head_t *wq = waiter->data;
+
+	wake_up_interruptible(wq);
+}
+
+typedef void (*action_handler)(struct host1x_waitlist *waiter);
+
+static action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = {
+	action_wakeup,
+	action_wakeup_interruptible,
+};
+
+static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT])
+{
+	struct list_head *head = completed;
+	int i;
+
+	for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i, ++head) {
+		action_handler handler = action_handlers[i];
+		struct host1x_waitlist *waiter, *next;
+
+		list_for_each_entry_safe(waiter, next, head, list) {
+			list_del(&waiter->list);
+			handler(waiter);
+			WARN_ON(atomic_xchg(&waiter->state, WLS_HANDLED)
+					!= WLS_REMOVED);
+			kref_put(&waiter->refcount, waiter_release);
+		}
+	}
+}
+
+/*
+ * Remove & handle all waiters that have completed for the given syncpt
+ */
+static int process_wait_list(struct host1x_intr *intr,
+			     struct host1x_intr_syncpt *syncpt,
+			     u32 threshold)
+{
+	struct host1x *host1x = intr_to_host1x(intr);
+	struct list_head completed[HOST1X_INTR_ACTION_COUNT];
+	unsigned int i;
+	int empty;
+
+	for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i)
+		INIT_LIST_HEAD(completed + i);
+
+	spin_lock(&syncpt->lock);
+
+	remove_completed_waiters(&syncpt->wait_head, threshold, completed);
+
+	empty = list_empty(&syncpt->wait_head);
+	if (empty)
+		host1x->intr_op.disable_syncpt_intr(intr, syncpt->id);
+	else
+		reset_threshold_interrupt(intr, &syncpt->wait_head,
+					  syncpt->id);
+
+	spin_unlock(&syncpt->lock);
+
+	run_handlers(completed);
+
+	return empty;
+}
+
+/*
+ * Sync point threshold interrupt service thread function
+ * Handles sync point threshold triggers, in thread context
+ */
+irqreturn_t host1x_syncpt_thresh_fn(void *dev_id)
+{
+	struct host1x_intr_syncpt *syncpt = dev_id;
+	unsigned int id = syncpt->id;
+	struct host1x_intr *intr = intr_syncpt_to_intr(syncpt);
+	struct host1x *host1x = intr_to_host1x(intr);
+
+	(void)process_wait_list(intr, syncpt,
+				host1x_syncpt_load_min(host1x->syncpt + id));
+
+	return IRQ_HANDLED;
+}
+
+int host1x_intr_add_action(struct host1x_intr *intr, u32 id, u32 thresh,
+			enum host1x_intr_action action, void *data,
+			void *_waiter,
+			void **ref)
+{
+	struct host1x *host1x = intr_to_host1x(intr);
+	struct host1x_waitlist *waiter = _waiter;
+	struct host1x_intr_syncpt *syncpt;
+	int queue_was_empty;
+
+	if (waiter == NULL) {
+		pr_warn("%s: NULL waiter\n", __func__);
+		return -EINVAL;
+	}
+
+	/* initialize a new waiter */
+	INIT_LIST_HEAD(&waiter->list);
+	kref_init(&waiter->refcount);
+	if (ref)
+		kref_get(&waiter->refcount);
+	waiter->thresh = thresh;
+	waiter->action = action;
+	atomic_set(&waiter->state, WLS_PENDING);
+	waiter->data = data;
+	waiter->count = 1;
+
+	syncpt = intr->syncpt + id;
+
+	spin_lock(&syncpt->lock);
+
+	queue_was_empty = list_empty(&syncpt->wait_head);
+
+	if (add_waiter_to_queue(waiter, &syncpt->wait_head)) {
+		/* added at head of list - new threshold value */
+		host1x->intr_op.set_syncpt_threshold(intr, id, thresh);
+
+		/* added as first waiter - enable interrupt */
+		if (queue_was_empty)
+			host1x->intr_op.enable_syncpt_intr(intr, id);
+	}
+
+	spin_unlock(&syncpt->lock);
+
+	if (ref)
+		*ref = waiter;
+	return 0;
+}
+
+void *host1x_intr_alloc_waiter(void)
+{
+	return kzalloc(sizeof(struct host1x_waitlist), GFP_KERNEL);
+}
+
+void host1x_intr_put_ref(struct host1x_intr *intr, u32 id, void *ref)
+{
+	struct host1x_waitlist *waiter = ref;
+	struct host1x_intr_syncpt *syncpt;
+	struct host1x *host1x = intr_to_host1x(intr);
+
+	while (atomic_cmpxchg(&waiter->state,
+				WLS_PENDING, WLS_CANCELLED) == WLS_REMOVED)
+		schedule();
+
+	syncpt = intr->syncpt + id;
+	(void)process_wait_list(intr, syncpt,
+				host1x_syncpt_load_min(host1x->syncpt + id));
+
+	kref_put(&waiter->refcount, waiter_release);
+}
+
+int host1x_intr_init(struct host1x_intr *intr, u32 irq_sync)
+{
+	unsigned int id;
+	struct host1x *host1x = intr_to_host1x(intr);
+	u32 nb_pts = host1x_syncpt_nb_pts(host1x);
+
+	intr->syncpt = devm_kzalloc(&host1x->dev->dev,
+			sizeof(struct host1x_intr_syncpt) *
+			host1x->info.nb_pts,
+			GFP_KERNEL);
+
+	if (!host1x->intr.syncpt)
+		return -ENOMEM;
+
+	mutex_init(&intr->mutex);
+	intr->syncpt_irq = irq_sync;
+	intr->wq = create_workqueue("host_syncpt");
+	if (!intr->wq)
+		return -ENOMEM;
+
+	for (id = 0; id < nb_pts; ++id) {
+		struct host1x_intr_syncpt *syncpt = &intr->syncpt[id];
+
+		syncpt->intr = &host1x->intr;
+		syncpt->id = id;
+		spin_lock_init(&syncpt->lock);
+		INIT_LIST_HEAD(&syncpt->wait_head);
+		snprintf(syncpt->thresh_irq_name,
+			sizeof(syncpt->thresh_irq_name),
+			"host1x_sp_%02d", id);
+	}
+
+	return 0;
+}
+
+void host1x_intr_deinit(struct host1x_intr *intr)
+{
+	host1x_intr_stop(intr);
+	destroy_workqueue(intr->wq);
+}
+
+void host1x_intr_start(struct host1x_intr *intr, u32 hz)
+{
+	struct host1x *host1x = intr_to_host1x(intr);
+	mutex_lock(&intr->mutex);
+
+	host1x->intr_op.init_host_sync(intr);
+	host1x->intr_op.set_host_clocks_per_usec(intr,
+			DIV_ROUND_UP(hz, 1000000));
+
+	mutex_unlock(&intr->mutex);
+}
+
+void host1x_intr_stop(struct host1x_intr *intr)
+{
+	unsigned int id;
+	struct host1x *host1x = intr_to_host1x(intr);
+	struct host1x_intr_syncpt *syncpt;
+	u32 nb_pts = host1x_syncpt_nb_pts(intr_to_host1x(intr));
+
+	mutex_lock(&intr->mutex);
+
+	host1x->intr_op.disable_all_syncpt_intrs(intr);
+
+	for (id = 0, syncpt = intr->syncpt;
+	     id < nb_pts;
+	     ++id, ++syncpt) {
+		struct host1x_waitlist *waiter, *next;
+		list_for_each_entry_safe(waiter, next,
+				&syncpt->wait_head, list) {
+			if (atomic_cmpxchg(&waiter->state,
+						WLS_CANCELLED, WLS_HANDLED)
+				== WLS_CANCELLED) {
+				list_del(&waiter->list);
+				kref_put(&waiter->refcount, waiter_release);
+			}
+		}
+
+		if (!list_empty(&syncpt->wait_head)) {  /* output diagnostics */
+			mutex_unlock(&intr->mutex);
+			pr_warn("%s cannot stop syncpt intr id=%d\n",
+					__func__, id);
+			return;
+		}
+	}
+
+	host1x->intr_op.free_syncpt_irq(intr);
+
+	mutex_unlock(&intr->mutex);
+}
diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
new file mode 100644
index 0000000..679a7b4
--- /dev/null
+++ b/drivers/gpu/host1x/intr.h
@@ -0,0 +1,103 @@ 
+/*
+ * Tegra host1x Interrupt Management
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __HOST1X_INTR_H
+#define __HOST1X_INTR_H
+
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+
+enum host1x_intr_action {
+	/*
+	 * Wake up a  task.
+	 * 'data' points to a wait_queue_head_t
+	 */
+	HOST1X_INTR_ACTION_WAKEUP,
+
+	/*
+	 * Wake up a interruptible task.
+	 * 'data' points to a wait_queue_head_t
+	 */
+	HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
+
+	HOST1X_INTR_ACTION_COUNT
+};
+
+struct host1x_intr;
+
+struct host1x_intr_syncpt {
+	struct host1x_intr *intr;
+	u8 id;
+	spinlock_t lock;
+	struct list_head wait_head;
+	char thresh_irq_name[12];
+	struct work_struct work;
+};
+
+struct host1x_intr {
+	struct host1x_intr_syncpt *syncpt;
+	struct mutex mutex;
+	int syncpt_irq;
+	struct workqueue_struct *wq;
+};
+#define intr_to_host1x(x) container_of(x, struct host1x, intr)
+#define intr_syncpt_to_intr(is) (is->intr)
+
+/*
+ * Schedule an action to be taken when a sync point reaches the given threshold.
+ *
+ * @id the sync point
+ * @thresh the threshold
+ * @action the action to take
+ * @data a pointer to extra data depending on action, see above
+ * @waiter waiter allocated with host1x_intr_alloc_waiter - assumes ownership
+ * @ref must be passed if cancellation is possible, else NULL
+ *
+ * This is a non-blocking api.
+ */
+int host1x_intr_add_action(struct host1x_intr *intr, u32 id, u32 thresh,
+			enum host1x_intr_action action, void *data,
+			void *waiter,
+			void **ref);
+
+/*
+ * Allocate a waiter.
+ */
+void *host1x_intr_alloc_waiter(void);
+
+/*
+ * Unreference an action submitted to host1x_intr_add_action().
+ * You must call this if you passed non-NULL as ref.
+ * @ref the ref returned from host1x_intr_add_action()
+ */
+void host1x_intr_put_ref(struct host1x_intr *intr, u32 id, void *ref);
+
+/* Initialize host1x sync point interrupt */
+int host1x_intr_init(struct host1x_intr *intr, u32 irq_sync);
+
+/* Deinitialize host1x sync point interrupt */
+void host1x_intr_deinit(struct host1x_intr *intr);
+
+/* Enable host1x sync point interrupt */
+void host1x_intr_start(struct host1x_intr *intr, u32 hz);
+
+/* Disable host1x sync point interrupt */
+void host1x_intr_stop(struct host1x_intr *intr);
+
+irqreturn_t host1x_syncpt_thresh_fn(void *dev_id);
+#endif
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index b45651f..32e2b42 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -22,9 +22,12 @@ 
 #include <linux/module.h>
 #include "syncpt.h"
 #include "dev.h"
+#include "intr.h"
 #include <trace/events/host1x.h>
 
 #define MAX_SYNCPT_LENGTH	5
+#define SYNCPT_CHECK_PERIOD (2 * HZ)
+#define MAX_STUCK_CHECK_COUNT 15
 
 static struct host1x_syncpt *_host1x_syncpt_alloc(struct host1x *host,
 		struct platform_device *pdev,
@@ -119,6 +122,166 @@  void host1x_syncpt_incr(struct host1x_syncpt *sp)
 	host1x_syncpt_cpu_incr(sp);
 }
 
+/*
+ * Updated sync point form hardware, and returns true if syncpoint is expired,
+ * false if we may need to wait
+ */
+static bool syncpt_load_min_is_expired(
+	struct host1x_syncpt *sp,
+	u32 thresh)
+{
+	sp->dev->syncpt_op.load_min(sp);
+	return host1x_syncpt_is_expired(sp, thresh);
+}
+
+/*
+ * Main entrypoint for syncpoint value waits.
+ */
+int host1x_syncpt_wait(struct host1x_syncpt *sp,
+			u32 thresh, long timeout, u32 *value)
+{
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+	void *ref;
+	void *waiter;
+	int err = 0, check_count = 0;
+	u32 val;
+
+	if (value)
+		*value = 0;
+
+	/* first check cache */
+	if (host1x_syncpt_is_expired(sp, thresh)) {
+		if (value)
+			*value = host1x_syncpt_read_min(sp);
+		return 0;
+	}
+
+	/* try to read from register */
+	val = sp->dev->syncpt_op.load_min(sp);
+	if (host1x_syncpt_is_expired(sp, thresh)) {
+		if (value)
+			*value = val;
+		goto done;
+	}
+
+	if (!timeout) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	/* schedule a wakeup when the syncpoint value is reached */
+	waiter = host1x_intr_alloc_waiter();
+	if (!waiter) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	err = host1x_intr_add_action(&(sp->dev->intr), sp->id, thresh,
+				HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE, &wq,
+				waiter,
+				&ref);
+	if (err)
+		goto done;
+
+	err = -EAGAIN;
+	/* Caller-specified timeout may be impractically low */
+	if (timeout < 0)
+		timeout = LONG_MAX;
+
+	/* wait for the syncpoint, or timeout, or signal */
+	while (timeout) {
+		long check = min_t(long, SYNCPT_CHECK_PERIOD, timeout);
+		int remain = wait_event_interruptible_timeout(wq,
+				syncpt_load_min_is_expired(sp, thresh),
+				check);
+		if (remain > 0 || host1x_syncpt_is_expired(sp, thresh)) {
+			if (value)
+				*value = host1x_syncpt_read_min(sp);
+			err = 0;
+			break;
+		}
+		if (remain < 0) {
+			err = remain;
+			break;
+		}
+		timeout -= check;
+		if (timeout && check_count <= MAX_STUCK_CHECK_COUNT) {
+			dev_warn(&sp->dev->dev->dev,
+				"%s: syncpoint id %d (%s) stuck waiting %d, timeout=%ld\n",
+				 current->comm, sp->id, sp->name,
+				 thresh, timeout);
+			sp->dev->syncpt_op.debug(sp);
+			check_count++;
+		}
+	}
+	host1x_intr_put_ref(&(sp->dev->intr), sp->id, ref);
+
+done:
+	return err;
+}
+EXPORT_SYMBOL(host1x_syncpt_wait);
+
+/*
+ * Returns true if syncpoint is expired, false if we may need to wait
+ */
+bool host1x_syncpt_is_expired(
+	struct host1x_syncpt *sp,
+	u32 thresh)
+{
+	u32 current_val;
+	u32 future_val;
+	smp_rmb();
+	current_val = (u32)atomic_read(&sp->min_val);
+	future_val = (u32)atomic_read(&sp->max_val);
+
+	/* Note the use of unsigned arithmetic here (mod 1<<32).
+	 *
+	 * c = current_val = min_val	= the current value of the syncpoint.
+	 * t = thresh			= the value we are checking
+	 * f = future_val  = max_val	= the value c will reach when all
+	 *				  outstanding increments have completed.
+	 *
+	 * Note that c always chases f until it reaches f.
+	 *
+	 * Dtf = (f - t)
+	 * Dtc = (c - t)
+	 *
+	 *  Consider all cases:
+	 *
+	 *	A) .....c..t..f.....	Dtf < Dtc	need to wait
+	 *	B) .....c.....f..t..	Dtf > Dtc	expired
+	 *	C) ..t..c.....f.....	Dtf > Dtc	expired	   (Dct very large)
+	 *
+	 *  Any case where f==c: always expired (for any t).	Dtf == Dcf
+	 *  Any case where t==c: always expired (for any f).	Dtf >= Dtc (because Dtc==0)
+	 *  Any case where t==f!=c: always wait.		Dtf <  Dtc (because Dtf==0,
+	 *							Dtc!=0)
+	 *
+	 *  Other cases:
+	 *
+	 *	A) .....t..f..c.....	Dtf < Dtc	need to wait
+	 *	A) .....f..c..t.....	Dtf < Dtc	need to wait
+	 *	A) .....f..t..c.....	Dtf > Dtc	expired
+	 *
+	 *   So:
+	 *	   Dtf >= Dtc implies EXPIRED	(return true)
+	 *	   Dtf <  Dtc implies WAIT	(return false)
+	 *
+	 * Note: If t is expired then we *cannot* wait on it. We would wait
+	 * forever (hang the system).
+	 *
+	 * Note: do NOT get clever and remove the -thresh from both sides. It
+	 * is NOT the same.
+	 *
+	 * If future valueis zero, we have a client managed sync point. In that
+	 * case we do a direct comparison.
+	 */
+	if (!host1x_syncpt_client_managed(sp))
+		return future_val - thresh >= current_val - thresh;
+	else
+		return (s32)(current_val - thresh) >= 0;
+}
+
 void host1x_syncpt_debug(struct host1x_syncpt *sp)
 {
 	sp->dev->syncpt_op.debug(sp);
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index d9b9b0a..b46d044 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -114,6 +114,7 @@  void host1x_syncpt_cpu_incr(struct host1x_syncpt *sp);
 
 /* Load current value from hardware to the shadow register. */
 u32 host1x_syncpt_load_min(struct host1x_syncpt *sp);
+bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh);
 
 /* Save host1x sync point state into shadow registers. */
 void host1x_syncpt_save(struct host1x *dev);
@@ -133,6 +134,10 @@  u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs);
 /* Do a debug dump of sync point values. */
 void host1x_syncpt_debug(struct host1x_syncpt *sp);
 
+/* Wait until sync point reaches a threshold value, or a timeout. */
+int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh,
+			long timeout, u32 *value);
+
 /* Check if sync point id is valid. */
 static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp)
 {