diff mbox

[1/4] gpu: host1x: Enable Tegra186 syncpoint protection

Message ID 20170818161553.27597-2-mperttunen@nvidia.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mikko Perttunen Aug. 18, 2017, 4:15 p.m. UTC
Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
specific channels, preventing any other channels from incrementing
them.

Enable this feature where available and assign syncpoints to channels
when submitting a job. Syncpoints are currently never unassigned from
channels since that would require extra work and is unnecessary with
the current channel allocation model.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
---
 drivers/gpu/host1x/dev.h           | 16 ++++++++++++++++
 drivers/gpu/host1x/hw/channel_hw.c |  3 +++
 drivers/gpu/host1x/hw/syncpt_hw.c  | 26 ++++++++++++++++++++++++++
 drivers/gpu/host1x/syncpt.c        |  3 +++
 4 files changed, 48 insertions(+)

Comments

Dmitry Osipenko Aug. 18, 2017, 10:36 p.m. UTC | #1
On 18.08.2017 19:15, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
> 
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> ---
>  drivers/gpu/host1x/dev.h           | 16 ++++++++++++++++
>  drivers/gpu/host1x/hw/channel_hw.c |  3 +++
>  drivers/gpu/host1x/hw/syncpt_hw.c  | 26 ++++++++++++++++++++++++++
>  drivers/gpu/host1x/syncpt.c        |  3 +++
>  4 files changed, 48 insertions(+)
> 
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index def802c0a6bf..2432a30ff6e2 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
>  	u32 (*load)(struct host1x_syncpt *syncpt);
>  	int (*cpu_incr)(struct host1x_syncpt *syncpt);
>  	int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
> +	void (*assign_channel)(struct host1x_syncpt *syncpt,
> +	                       struct host1x_channel *channel);
> +	void (*set_protection)(struct host1x *host, bool enabled);
>  };
>  
>  struct host1x_intr_ops {
> @@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
>  	return host->syncpt_op->patch_wait(sp, patch_addr);
>  }
>  
> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
> +						   struct host1x_syncpt *sp,
> +						   struct host1x_channel *ch)
> +{
> +	return host->syncpt_op->assign_channel(sp, ch);
> +}
> +
> +static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
> +						   bool enabled)
> +{
> +	return host->syncpt_op->set_protection(host, enabled);
> +}
> +
>  static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
>  			void (*syncpt_thresh_work)(struct work_struct *))
>  {
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
> index 8447a56c41ca..0161da331702 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>  
>  	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>  
> +	/* assign syncpoint to channel */
> +	host1x_hw_syncpt_assign_channel(host, sp, ch);
> +
>  	job->syncpt_end = syncval;
>  
>  	/* add a setclass for modules that require it */
> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
> index 7b0270d60742..5d117ab1699e 100644
> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
> @@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
>  	return 0;
>  }
>  
> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
> +				  struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> +	struct host1x *host = sp->host;
> +
> +	if (!host->hv_regs)
> +		return;
> +
> +	host1x_sync_writel(host,
> +			   HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
> +			   HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
> +#endif
> +}
> +
> +static void syncpt_set_protection(struct host1x *host, bool enabled)
> +{
> +#if HOST1X_HW >= 6
> +	host1x_hypervisor_writel(host,
> +				 enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
> +				 HOST1X_HV_SYNCPT_PROT_EN);
> +#endif
> +}
> +
>  static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>  	.restore = syncpt_restore,
>  	.restore_wait_base = syncpt_restore_wait_base,
> @@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>  	.load = syncpt_load,
>  	.cpu_incr = syncpt_cpu_incr,
>  	.patch_wait = syncpt_patch_wait,
> +	.assign_channel = syncpt_assign_channel,
> +	.set_protection = syncpt_set_protection,
>  };
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..fe4d963b3e2a 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
>  	for (i = 0; i < host->info->nb_pts; i++) {
>  		syncpt[i].id = i;
>  		syncpt[i].host = host;
> +
> +		host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
>  	}
>  
>  	for (i = 0; i < host->info->nb_bases; i++)
> @@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
>  	host->bases = bases;
>  
>  	host1x_syncpt_restore(host);
> +	host1x_hw_syncpt_set_protection(host, true);

Is it really okay to force the protection? Maybe protection should be enabled
with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
avoid software jobs validation for Tegra124+.

>  
>  	/* Allocate sync point to use for clearing waits for expired fences */
>  	host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
>
Mikko Perttunen Aug. 19, 2017, 8:10 a.m. UTC | #2
On 08/19/2017 01:36 AM, Dmitry Osipenko wrote:
> On 18.08.2017 19:15, Mikko Perttunen wrote:
>> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
>> specific channels, preventing any other channels from incrementing
>> them.
>>
>> Enable this feature where available and assign syncpoints to channels
>> when submitting a job. Syncpoints are currently never unassigned from
>> channels since that would require extra work and is unnecessary with
>> the current channel allocation model.
>>
>> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
>> ---
>>   drivers/gpu/host1x/dev.h           | 16 ++++++++++++++++
>>   drivers/gpu/host1x/hw/channel_hw.c |  3 +++
>>   drivers/gpu/host1x/hw/syncpt_hw.c  | 26 ++++++++++++++++++++++++++
>>   drivers/gpu/host1x/syncpt.c        |  3 +++
>>   4 files changed, 48 insertions(+)
>>
>> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
>> index def802c0a6bf..2432a30ff6e2 100644
>> --- a/drivers/gpu/host1x/dev.h
>> +++ b/drivers/gpu/host1x/dev.h
>> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
>>   	u32 (*load)(struct host1x_syncpt *syncpt);
>>   	int (*cpu_incr)(struct host1x_syncpt *syncpt);
>>   	int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
>> +	void (*assign_channel)(struct host1x_syncpt *syncpt,
>> +	                       struct host1x_channel *channel);
>> +	void (*set_protection)(struct host1x *host, bool enabled);
>>   };
>>   
>>   struct host1x_intr_ops {
>> @@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
>>   	return host->syncpt_op->patch_wait(sp, patch_addr);
>>   }
>>   
>> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
>> +						   struct host1x_syncpt *sp,
>> +						   struct host1x_channel *ch)
>> +{
>> +	return host->syncpt_op->assign_channel(sp, ch);
>> +}
>> +
>> +static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
>> +						   bool enabled)
>> +{
>> +	return host->syncpt_op->set_protection(host, enabled);
>> +}
>> +
>>   static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
>>   			void (*syncpt_thresh_work)(struct work_struct *))
>>   {
>> diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
>> index 8447a56c41ca..0161da331702 100644
>> --- a/drivers/gpu/host1x/hw/channel_hw.c
>> +++ b/drivers/gpu/host1x/hw/channel_hw.c
>> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>>   
>>   	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>>   
>> +	/* assign syncpoint to channel */
>> +	host1x_hw_syncpt_assign_channel(host, sp, ch);
>> +
>>   	job->syncpt_end = syncval;
>>   
>>   	/* add a setclass for modules that require it */
>> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
>> index 7b0270d60742..5d117ab1699e 100644
>> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
>> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
>> @@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
>>   	return 0;
>>   }
>>   
>> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
>> +				  struct host1x_channel *ch)
>> +{
>> +#if HOST1X_HW >= 6
>> +	struct host1x *host = sp->host;
>> +
>> +	if (!host->hv_regs)
>> +		return;
>> +
>> +	host1x_sync_writel(host,
>> +			   HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
>> +			   HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
>> +#endif
>> +}
>> +
>> +static void syncpt_set_protection(struct host1x *host, bool enabled)
>> +{
>> +#if HOST1X_HW >= 6
>> +	host1x_hypervisor_writel(host,
>> +				 enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
>> +				 HOST1X_HV_SYNCPT_PROT_EN);
>> +#endif
>> +}
>> +
>>   static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>>   	.restore = syncpt_restore,
>>   	.restore_wait_base = syncpt_restore_wait_base,
>> @@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>>   	.load = syncpt_load,
>>   	.cpu_incr = syncpt_cpu_incr,
>>   	.patch_wait = syncpt_patch_wait,
>> +	.assign_channel = syncpt_assign_channel,
>> +	.set_protection = syncpt_set_protection,
>>   };
>> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
>> index 048ac9e344ce..fe4d963b3e2a 100644
>> --- a/drivers/gpu/host1x/syncpt.c
>> +++ b/drivers/gpu/host1x/syncpt.c
>> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
>>   	for (i = 0; i < host->info->nb_pts; i++) {
>>   		syncpt[i].id = i;
>>   		syncpt[i].host = host;
>> +
>> +		host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
>>   	}
>>   
>>   	for (i = 0; i < host->info->nb_bases; i++)
>> @@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
>>   	host->bases = bases;
>>   
>>   	host1x_syncpt_restore(host);
>> +	host1x_hw_syncpt_set_protection(host, true);
> 
> Is it really okay to force the protection? Maybe protection should be enabled
> with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
> avoid software jobs validation for Tegra124+.

I don't quite get your comment. The hardware syncpt protection layer 
being enabled should never hurt - it doesn't mess with any valid jobs. 
It's also only on Tegra186 so I'm not sure where the Tegra124 comes from.

Cheers,
Mikko

> 
>>   
>>   	/* Allocate sync point to use for clearing waits for expired fences */
>>   	host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
>>
> 
>
Dmitry Osipenko Aug. 19, 2017, 10:09 a.m. UTC | #3
On 19.08.2017 11:10, Mikko Perttunen wrote:
[snip]
>>> +    host1x_hw_syncpt_set_protection(host, true);
>>
>> Is it really okay to force the protection? Maybe protection should be enabled
>> with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
>> avoid software jobs validation for Tegra124+.
> 
> I don't quite get your comment. The hardware syncpt protection layer being
> enabled should never hurt - it doesn't mess with any valid jobs. It's also only
> on Tegra186 so I'm not sure where the Tegra124 comes from.

Right, it's the gather filter on T124+, my bad. This raises several questions.

1) Why we have CONFIG_TEGRA_HOST1X_FIREWALL? Should it be always enforced or we
actually want to be a bit more flexible and allow to disable it. Imagine that
you are making a custom application and want to utilize channels in a different way.

2) Since syncpoint protection is a T186 feature, what about previous
generations? Should we validate syncpoints in software for them? We have
'syncpoint validation' patch staged in grate's kernel
https://github.com/grate-driver/linux/commit/c8b6c82173f2ee9fead23380e8330b8099e7d5e7
(I'll start sending out this and other patches after a bit more thorough
testing.) Improperly used syncpoints potentially could allow one program to
damage others.

3) What exactly does gather filter? Could you list all the commands that it
filters out, please?

4) What about T30/T114 that do not have gather filter? Should we validate those
commands for them in a software firewall?

So maybe we should implement several layers of validation in the SW firewall.
Like all layers for T20 (memory boundaries validation etc), software gather
filter for T30/114 and software syncpoint validation for T30/114/124/210.
Mikko Perttunen Aug. 19, 2017, 10:35 a.m. UTC | #4
On 08/19/2017 01:09 PM, Dmitry Osipenko wrote:
> On 19.08.2017 11:10, Mikko Perttunen wrote:
> [snip]
>>>> +    host1x_hw_syncpt_set_protection(host, true);
>>>
>>> Is it really okay to force the protection? Maybe protection should be enabled
>>> with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
>>> avoid software jobs validation for Tegra124+.
>>
>> I don't quite get your comment. The hardware syncpt protection layer being
>> enabled should never hurt - it doesn't mess with any valid jobs. It's also only
>> on Tegra186 so I'm not sure where the Tegra124 comes from.
> 
> Right, it's the gather filter on T124+, my bad. This raises several questions.
> 
> 1) Why we have CONFIG_TEGRA_HOST1X_FIREWALL? Should it be always enforced or we
> actually want to be a bit more flexible and allow to disable it. Imagine that
> you are making a custom application and want to utilize channels in a different way.

I think it should be up to the user to decide whether they want the 
firewall or not. It's clearly the most useful on the older chips - 
especially Tegra20 due to lack of IOMMU. The performance penalty is too 
great to force it on always.

The programming model should always be considered the same - the rules 
of what you are allowed to do are the same whether the firewall, or any 
hardware-implemented protection features, are on or not.

> 
> 2) Since syncpoint protection is a T186 feature, what about previous
> generations? Should we validate syncpoints in software for them? We have
> 'syncpoint validation' patch staged in grate's kernel
> https://github.com/grate-driver/linux/commit/c8b6c82173f2ee9fead23380e8330b8099e7d5e7
> (I'll start sending out this and other patches after a bit more thorough
> testing.) Improperly used syncpoints potentially could allow one program to
> damage others.

Yes, I think the firewall should have this feature for older 
generations. We could disable the check on Tegra186, as you point 
towards in question 4.

> 
> 3) What exactly does gather filter? Could you list all the commands that it
> filters out, please?

According to the Tegra186 TRM (section 16.8.32), SETCLASS, SETSTRMID and 
EXTEND are filtered.

> 
> 4) What about T30/T114 that do not have gather filter? Should we validate those
> commands for them in a software firewall?

Yes, the firewall should validate that.

> 
> So maybe we should implement several layers of validation in the SW firewall.
> Like all layers for T20 (memory boundaries validation etc), software gather
> filter for T30/114 and software syncpoint validation for T30/114/124/210.
> 

That seems like a good idea.

Thanks,
Mikko
Dmitry Osipenko Aug. 19, 2017, 11:11 a.m. UTC | #5
On 19.08.2017 13:35, Mikko Perttunen wrote:
> On 08/19/2017 01:09 PM, Dmitry Osipenko wrote:
>> On 19.08.2017 11:10, Mikko Perttunen wrote:
>> [snip]
>>>>> +    host1x_hw_syncpt_set_protection(host, true);
>>>>
>>>> Is it really okay to force the protection? Maybe protection should be enabled
>>>> with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
>>>> avoid software jobs validation for Tegra124+.
>>>
>>> I don't quite get your comment. The hardware syncpt protection layer being
>>> enabled should never hurt - it doesn't mess with any valid jobs. It's also only
>>> on Tegra186 so I'm not sure where the Tegra124 comes from.
>>
>> Right, it's the gather filter on T124+, my bad. This raises several questions.
>>
>> 1) Why we have CONFIG_TEGRA_HOST1X_FIREWALL? Should it be always enforced or we
>> actually want to be a bit more flexible and allow to disable it. Imagine that
>> you are making a custom application and want to utilize channels in a
>> different way.
> 
> I think it should be up to the user to decide whether they want the firewall or
> not. It's clearly the most useful on the older chips - especially Tegra20 due to
> lack of IOMMU. The performance penalty is too great to force it on always.
> 

Of course there is some overhead but is not that great. Usually command buffer
contains just a dozen of commands. It should be an interesting challenge to
optimize its performance though.

> The programming model should always be considered the same - the rules of what
> you are allowed to do are the same whether the firewall, or any
> hardware-implemented protection features, are on or not.
> 

Well, okay.

>>
>> 2) Since syncpoint protection is a T186 feature, what about previous
>> generations? Should we validate syncpoints in software for them? We have
>> 'syncpoint validation' patch staged in grate's kernel
>> https://github.com/grate-driver/linux/commit/c8b6c82173f2ee9fead23380e8330b8099e7d5e7
>>
>> (I'll start sending out this and other patches after a bit more thorough
>> testing.) Improperly used syncpoints potentially could allow one program to
>> damage others.
> 
> Yes, I think the firewall should have this feature for older generations. We
> could disable the check on Tegra186, as you point towards in question 4.
> 
>>
>> 3) What exactly does gather filter? Could you list all the commands that it
>> filters out, please?
> 
> According to the Tegra186 TRM (section 16.8.32), SETCLASS, SETSTRMID and EXTEND
> are filtered.
> 

Okay, then what about SETSTRMID command, I don't see its disassembly in the
host1x gather debug dump. Is it accidentally missed?

>>
>> 4) What about T30/T114 that do not have gather filter? Should we validate those
>> commands for them in a software firewall?
> 
> Yes, the firewall should validate that.
> 
>>
>> So maybe we should implement several layers of validation in the SW firewall.
>> Like all layers for T20 (memory boundaries validation etc), software gather
>> filter for T30/114 and software syncpoint validation for T30/114/124/210.
>>
> 
> That seems like a good idea.

Alright, factoring out firewall from job.c probably should be the first step.
Mikko Perttunen Aug. 19, 2017, 11:32 a.m. UTC | #6
On 08/19/2017 02:11 PM, Dmitry Osipenko wrote:
> On 19.08.2017 13:35, Mikko Perttunen wrote:
>> On 08/19/2017 01:09 PM, Dmitry Osipenko wrote:
>>> On 19.08.2017 11:10, Mikko Perttunen wrote:
>>> [snip]
>>>>>> +    host1x_hw_syncpt_set_protection(host, true);
>>>>>
>>>>> Is it really okay to force the protection? Maybe protection should be enabled
>>>>> with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
>>>>> avoid software jobs validation for Tegra124+.
>>>>
>>>> I don't quite get your comment. The hardware syncpt protection layer being
>>>> enabled should never hurt - it doesn't mess with any valid jobs. It's also only
>>>> on Tegra186 so I'm not sure where the Tegra124 comes from.
>>>
>>> Right, it's the gather filter on T124+, my bad. This raises several questions.
>>>
>>> 1) Why we have CONFIG_TEGRA_HOST1X_FIREWALL? Should it be always enforced or we
>>> actually want to be a bit more flexible and allow to disable it. Imagine that
>>> you are making a custom application and want to utilize channels in a
>>> different way.
>>
>> I think it should be up to the user to decide whether they want the firewall or
>> not. It's clearly the most useful on the older chips - especially Tegra20 due to
>> lack of IOMMU. The performance penalty is too great to force it on always.
>>
> 
> Of course there is some overhead but is not that great. Usually command buffer
> contains just a dozen of commands. It should be an interesting challenge to
> optimize its performance though.
> 
>> The programming model should always be considered the same - the rules of what
>> you are allowed to do are the same whether the firewall, or any
>> hardware-implemented protection features, are on or not.
>>
> 
> Well, okay.
> 
>>>
>>> 2) Since syncpoint protection is a T186 feature, what about previous
>>> generations? Should we validate syncpoints in software for them? We have
>>> 'syncpoint validation' patch staged in grate's kernel
>>> https://github.com/grate-driver/linux/commit/c8b6c82173f2ee9fead23380e8330b8099e7d5e7
>>>
>>> (I'll start sending out this and other patches after a bit more thorough
>>> testing.) Improperly used syncpoints potentially could allow one program to
>>> damage others.
>>
>> Yes, I think the firewall should have this feature for older generations. We
>> could disable the check on Tegra186, as you point towards in question 4.
>>
>>>
>>> 3) What exactly does gather filter? Could you list all the commands that it
>>> filters out, please?
>>
>> According to the Tegra186 TRM (section 16.8.32), SETCLASS, SETSTRMID and EXTEND
>> are filtered.
>>
> 
> Okay, then what about SETSTRMID command, I don't see its disassembly in the
> host1x gather debug dump. Is it accidentally missed?
> 

True, it's a new command in Tegra186 and I missed adding it to the 
disassembler. It's probably fine to add it in another patch since it's 
only intended for kernel use and it's useless without IOMMU support 
anyway (which we don't have currently on Tegra186).

>>>
>>> 4) What about T30/T114 that do not have gather filter? Should we validate those
>>> commands for them in a software firewall?
>>
>> Yes, the firewall should validate that.
>>
>>>
>>> So maybe we should implement several layers of validation in the SW firewall.
>>> Like all layers for T20 (memory boundaries validation etc), software gather
>>> filter for T30/114 and software syncpoint validation for T30/114/124/210.
>>>
>>
>> That seems like a good idea.
> 
> Alright, factoring out firewall from job.c probably should be the first step.
>
Dmitry Osipenko Aug. 19, 2017, 11:51 a.m. UTC | #7
On 19.08.2017 14:32, Mikko Perttunen wrote:
> 
> 
> On 08/19/2017 02:11 PM, Dmitry Osipenko wrote:
>> On 19.08.2017 13:35, Mikko Perttunen wrote:
>>> On 08/19/2017 01:09 PM, Dmitry Osipenko wrote:
>>>> On 19.08.2017 11:10, Mikko Perttunen wrote:
>>>> [snip]
>>>>>>> +    host1x_hw_syncpt_set_protection(host, true);
>>>>>>
>>>>>> Is it really okay to force the protection? Maybe protection should be enabled
>>>>>> with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
>>>>>> avoid software jobs validation for Tegra124+.
>>>>>
>>>>> I don't quite get your comment. The hardware syncpt protection layer being
>>>>> enabled should never hurt - it doesn't mess with any valid jobs. It's also
>>>>> only
>>>>> on Tegra186 so I'm not sure where the Tegra124 comes from.
>>>>
>>>> Right, it's the gather filter on T124+, my bad. This raises several questions.
>>>>
>>>> 1) Why we have CONFIG_TEGRA_HOST1X_FIREWALL? Should it be always enforced or we
>>>> actually want to be a bit more flexible and allow to disable it. Imagine that
>>>> you are making a custom application and want to utilize channels in a
>>>> different way.
>>>
>>> I think it should be up to the user to decide whether they want the firewall or
>>> not. It's clearly the most useful on the older chips - especially Tegra20 due to
>>> lack of IOMMU. The performance penalty is too great to force it on always.
>>>
>>
>> Of course there is some overhead but is not that great. Usually command buffer
>> contains just a dozen of commands. It should be an interesting challenge to
>> optimize its performance though.
>>
>>> The programming model should always be considered the same - the rules of what
>>> you are allowed to do are the same whether the firewall, or any
>>> hardware-implemented protection features, are on or not.
>>>
>>
>> Well, okay.
>>
>>>>
>>>> 2) Since syncpoint protection is a T186 feature, what about previous
>>>> generations? Should we validate syncpoints in software for them? We have
>>>> 'syncpoint validation' patch staged in grate's kernel
>>>> https://github.com/grate-driver/linux/commit/c8b6c82173f2ee9fead23380e8330b8099e7d5e7
>>>>
>>>>
>>>> (I'll start sending out this and other patches after a bit more thorough
>>>> testing.) Improperly used syncpoints potentially could allow one program to
>>>> damage others.
>>>
>>> Yes, I think the firewall should have this feature for older generations. We
>>> could disable the check on Tegra186, as you point towards in question 4.
>>>
>>>>
>>>> 3) What exactly does gather filter? Could you list all the commands that it
>>>> filters out, please?
>>>
>>> According to the Tegra186 TRM (section 16.8.32), SETCLASS, SETSTRMID and EXTEND
>>> are filtered.
>>>
>>
>> Okay, then what about SETSTRMID command, I don't see its disassembly in the
>> host1x gather debug dump. Is it accidentally missed?
>>
> 
> True, it's a new command in Tegra186 and I missed adding it to the disassembler.
> It's probably fine to add it in another patch since it's only intended for
> kernel use and it's useless without IOMMU support anyway (which we don't have
> currently on Tegra186).
> 

Yeah, but it probably would be more preferable that this patch would predate the
"gather filter" enabling.

>>>>
>>>> 4) What about T30/T114 that do not have gather filter? Should we validate those
>>>> commands for them in a software firewall?
>>>
>>> Yes, the firewall should validate that.
>>>
>>>>
>>>> So maybe we should implement several layers of validation in the SW firewall.
>>>> Like all layers for T20 (memory boundaries validation etc), software gather
>>>> filter for T30/114 and software syncpoint validation for T30/114/124/210.
>>>>
>>>
>>> That seems like a good idea.
>>
>> Alright, factoring out firewall from job.c probably should be the first step.
>>
Dmitry Osipenko Aug. 19, 2017, 12:02 p.m. UTC | #8
On 18.08.2017 19:15, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
> 
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> ---
[snip]
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..fe4d963b3e2a 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
>  	for (i = 0; i < host->info->nb_pts; i++) {
>  		syncpt[i].id = i;
>  		syncpt[i].host = host;
> +
> +		host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
>  	}

What about to factor out that assignment and add a comment, something like this:

/* clear syncpoint-channel assignments on Tegra186+ */
for (i = 0; i < host->info->nb_pts; i++)
	host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);

And maybe even add an inline function for clarity, like:

static inline void host1x_hw_syncpt_deassign_channel(struct host1x *host,
						     struct host1x_syncpt *sp)
{
	return host->syncpt_op->assign_channel(sp, NULL);
}
Dmitry Osipenko Aug. 20, 2017, 4:18 p.m. UTC | #9
On 18.08.2017 19:15, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
> 
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> ---
>  drivers/gpu/host1x/dev.h           | 16 ++++++++++++++++
>  drivers/gpu/host1x/hw/channel_hw.c |  3 +++
>  drivers/gpu/host1x/hw/syncpt_hw.c  | 26 ++++++++++++++++++++++++++
>  drivers/gpu/host1x/syncpt.c        |  3 +++
>  4 files changed, 48 insertions(+)
> 
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index def802c0a6bf..2432a30ff6e2 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
>  	u32 (*load)(struct host1x_syncpt *syncpt);
>  	int (*cpu_incr)(struct host1x_syncpt *syncpt);
>  	int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
> +	void (*assign_channel)(struct host1x_syncpt *syncpt,
> +	                       struct host1x_channel *channel);
> +	void (*set_protection)(struct host1x *host, bool enabled);
>  };
>  
>  struct host1x_intr_ops {
> @@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
>  	return host->syncpt_op->patch_wait(sp, patch_addr);
>  }
>  
> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
> +						   struct host1x_syncpt *sp,
> +						   struct host1x_channel *ch)
> +{
> +	return host->syncpt_op->assign_channel(sp, ch);
> +}
> +
> +static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
> +						   bool enabled)
> +{
> +	return host->syncpt_op->set_protection(host, enabled);
> +}
> +
>  static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
>  			void (*syncpt_thresh_work)(struct work_struct *))
>  {
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
> index 8447a56c41ca..0161da331702 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>  
>  	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>  
> +	/* assign syncpoint to channel */
> +	host1x_hw_syncpt_assign_channel(host, sp, ch);
> +
>  	job->syncpt_end = syncval;
>  
>  	/* add a setclass for modules that require it */
> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
> index 7b0270d60742..5d117ab1699e 100644
> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
> @@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
>  	return 0;
>  }
>  
> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
> +				  struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> +	struct host1x *host = sp->host;
> +
> +	if (!host->hv_regs)
> +		return;
> +
> +	host1x_sync_writel(host,
> +			   HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
> +			   HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
> +#endif
> +}
> +
> +static void syncpt_set_protection(struct host1x *host, bool enabled)
> +{
> +#if HOST1X_HW >= 6
> +	host1x_hypervisor_writel(host,
> +				 enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
> +				 HOST1X_HV_SYNCPT_PROT_EN);
> +#endif
> +}
> +
>  static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>  	.restore = syncpt_restore,
>  	.restore_wait_base = syncpt_restore_wait_base,
> @@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>  	.load = syncpt_load,
>  	.cpu_incr = syncpt_cpu_incr,
>  	.patch_wait = syncpt_patch_wait,
> +	.assign_channel = syncpt_assign_channel,
> +	.set_protection = syncpt_set_protection,
>  };
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..fe4d963b3e2a 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
>  	for (i = 0; i < host->info->nb_pts; i++) {
>  		syncpt[i].id = i;
>  		syncpt[i].host = host;
> +
> +		host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
>  	}
>  
>  	for (i = 0; i < host->info->nb_bases; i++)
> @@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
>  	host->bases = bases;
>  
>  	host1x_syncpt_restore(host);
> +	host1x_hw_syncpt_set_protection(host, true);

Since protection is never disabled maybe something like
host1x_hw_syncpt_enable_protection() would fit a bit better.

>  
>  	/* Allocate sync point to use for clearing waits for expired fences */
>  	host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
>
Dmitry Osipenko Aug. 20, 2017, 4:59 p.m. UTC | #10
On 18.08.2017 19:15, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
> 
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> ---
>  drivers/gpu/host1x/dev.h           | 16 ++++++++++++++++
>  drivers/gpu/host1x/hw/channel_hw.c |  3 +++
>  drivers/gpu/host1x/hw/syncpt_hw.c  | 26 ++++++++++++++++++++++++++
>  drivers/gpu/host1x/syncpt.c        |  3 +++
>  4 files changed, 48 insertions(+)
> 
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index def802c0a6bf..2432a30ff6e2 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
>  	u32 (*load)(struct host1x_syncpt *syncpt);
>  	int (*cpu_incr)(struct host1x_syncpt *syncpt);
>  	int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
> +	void (*assign_channel)(struct host1x_syncpt *syncpt,
> +	                       struct host1x_channel *channel);
> +	void (*set_protection)(struct host1x *host, bool enabled);
>  };
>  
>  struct host1x_intr_ops {
> @@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
>  	return host->syncpt_op->patch_wait(sp, patch_addr);
>  }
>  
> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
> +						   struct host1x_syncpt *sp,
> +						   struct host1x_channel *ch)
> +{
> +	return host->syncpt_op->assign_channel(sp, ch);
> +}
> +
> +static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
> +						   bool enabled)
> +{
> +	return host->syncpt_op->set_protection(host, enabled);
> +}
> +
>  static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
>  			void (*syncpt_thresh_work)(struct work_struct *))
>  {
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
> index 8447a56c41ca..0161da331702 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>  
>  	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>  
> +	/* assign syncpoint to channel */
> +	host1x_hw_syncpt_assign_channel(host, sp, ch);
> +
>  	job->syncpt_end = syncval;
>  
>  	/* add a setclass for modules that require it */
> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
> index 7b0270d60742..5d117ab1699e 100644
> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
> @@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
>  	return 0;
>  }
>  
> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
> +				  struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> +	struct host1x *host = sp->host;
> +
> +	if (!host->hv_regs)
> +		return;

This check should be placed in syncpt_set_protection().

> +
> +	host1x_sync_writel(host,
> +			   HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
> +			   HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
> +#endif
> +}
> +
> +static void syncpt_set_protection(struct host1x *host, bool enabled)
> +{
> +#if HOST1X_HW >= 6
> +	host1x_hypervisor_writel(host,
> +				 enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
> +				 HOST1X_HV_SYNCPT_PROT_EN);
> +#endif
> +}
> +
>  static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>  	.restore = syncpt_restore,
>  	.restore_wait_base = syncpt_restore_wait_base,
> @@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>  	.load = syncpt_load,
>  	.cpu_incr = syncpt_cpu_incr,
>  	.patch_wait = syncpt_patch_wait,
> +	.assign_channel = syncpt_assign_channel,
> +	.set_protection = syncpt_set_protection,
>  };
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..fe4d963b3e2a 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
>  	for (i = 0; i < host->info->nb_pts; i++) {
>  		syncpt[i].id = i;
>  		syncpt[i].host = host;
> +
> +		host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
>  	}
>  
>  	for (i = 0; i < host->info->nb_bases; i++)
> @@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
>  	host->bases = bases;
>  
>  	host1x_syncpt_restore(host);
> +	host1x_hw_syncpt_set_protection(host, true);
>  
>  	/* Allocate sync point to use for clearing waits for expired fences */
>  	host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
>
Dmitry Osipenko Aug. 20, 2017, 6:13 p.m. UTC | #11
On 18.08.2017 19:15, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
> 
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> ---
>  drivers/gpu/host1x/dev.h           | 16 ++++++++++++++++
>  drivers/gpu/host1x/hw/channel_hw.c |  3 +++
>  drivers/gpu/host1x/hw/syncpt_hw.c  | 26 ++++++++++++++++++++++++++
>  drivers/gpu/host1x/syncpt.c        |  3 +++
>  4 files changed, 48 insertions(+)
> 
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index def802c0a6bf..2432a30ff6e2 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
>  	u32 (*load)(struct host1x_syncpt *syncpt);
>  	int (*cpu_incr)(struct host1x_syncpt *syncpt);
>  	int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
> +	void (*assign_channel)(struct host1x_syncpt *syncpt,
> +	                       struct host1x_channel *channel);
> +	void (*set_protection)(struct host1x *host, bool enabled);
>  };
>  
>  struct host1x_intr_ops {
> @@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
>  	return host->syncpt_op->patch_wait(sp, patch_addr);
>  }
>  
> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
> +						   struct host1x_syncpt *sp,
> +						   struct host1x_channel *ch)
> +{
> +	return host->syncpt_op->assign_channel(sp, ch);
> +}
> +
> +static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
> +						   bool enabled)
> +{
> +	return host->syncpt_op->set_protection(host, enabled);
> +}
> +
>  static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
>  			void (*syncpt_thresh_work)(struct work_struct *))
>  {
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
> index 8447a56c41ca..0161da331702 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>  
>  	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>  
> +	/* assign syncpoint to channel */
> +	host1x_hw_syncpt_assign_channel(host, sp, ch);
> +

Since there is one client per channel, it probably would make sense to assign
client syncpoints on host1x_channel_request().

>  	job->syncpt_end = syncval;
>  
>  	/* add a setclass for modules that require it */
> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
> index 7b0270d60742..5d117ab1699e 100644
> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
> @@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
>  	return 0;
>  }
>  
> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
> +				  struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> +	struct host1x *host = sp->host;
> +
> +	if (!host->hv_regs)
> +		return;
> +
> +	host1x_sync_writel(host,
> +			   HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
> +			   HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
> +#endif
> +}
> +
> +static void syncpt_set_protection(struct host1x *host, bool enabled)
> +{
> +#if HOST1X_HW >= 6
> +	host1x_hypervisor_writel(host,
> +				 enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
> +				 HOST1X_HV_SYNCPT_PROT_EN);
> +#endif
> +}
> +
>  static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>  	.restore = syncpt_restore,
>  	.restore_wait_base = syncpt_restore_wait_base,
> @@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>  	.load = syncpt_load,
>  	.cpu_incr = syncpt_cpu_incr,
>  	.patch_wait = syncpt_patch_wait,
> +	.assign_channel = syncpt_assign_channel,
> +	.set_protection = syncpt_set_protection,
>  };
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..fe4d963b3e2a 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
>  	for (i = 0; i < host->info->nb_pts; i++) {
>  		syncpt[i].id = i;
>  		syncpt[i].host = host;
> +
> +		host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
>  	}
>  
>  	for (i = 0; i < host->info->nb_bases; i++)
> @@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
>  	host->bases = bases;
>  
>  	host1x_syncpt_restore(host);
> +	host1x_hw_syncpt_set_protection(host, true);
>  
>  	/* Allocate sync point to use for clearing waits for expired fences */
>  	host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
>
diff mbox

Patch

diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index def802c0a6bf..2432a30ff6e2 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -79,6 +79,9 @@  struct host1x_syncpt_ops {
 	u32 (*load)(struct host1x_syncpt *syncpt);
 	int (*cpu_incr)(struct host1x_syncpt *syncpt);
 	int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
+	void (*assign_channel)(struct host1x_syncpt *syncpt,
+	                       struct host1x_channel *channel);
+	void (*set_protection)(struct host1x *host, bool enabled);
 };
 
 struct host1x_intr_ops {
@@ -186,6 +189,19 @@  static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
 	return host->syncpt_op->patch_wait(sp, patch_addr);
 }
 
+static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
+						   struct host1x_syncpt *sp,
+						   struct host1x_channel *ch)
+{
+	return host->syncpt_op->assign_channel(sp, ch);
+}
+
+static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
+						   bool enabled)
+{
+	return host->syncpt_op->set_protection(host, enabled);
+}
+
 static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
 			void (*syncpt_thresh_work)(struct work_struct *))
 {
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 8447a56c41ca..0161da331702 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -147,6 +147,9 @@  static int channel_submit(struct host1x_job *job)
 
 	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
 
+	/* assign syncpoint to channel */
+	host1x_hw_syncpt_assign_channel(host, sp, ch);
+
 	job->syncpt_end = syncval;
 
 	/* add a setclass for modules that require it */
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
index 7b0270d60742..5d117ab1699e 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -106,6 +106,30 @@  static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
 	return 0;
 }
 
+static void syncpt_assign_channel(struct host1x_syncpt *sp,
+				  struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+	struct host1x *host = sp->host;
+
+	if (!host->hv_regs)
+		return;
+
+	host1x_sync_writel(host,
+			   HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
+			   HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
+#endif
+}
+
+static void syncpt_set_protection(struct host1x *host, bool enabled)
+{
+#if HOST1X_HW >= 6
+	host1x_hypervisor_writel(host,
+				 enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
+				 HOST1X_HV_SYNCPT_PROT_EN);
+#endif
+}
+
 static const struct host1x_syncpt_ops host1x_syncpt_ops = {
 	.restore = syncpt_restore,
 	.restore_wait_base = syncpt_restore_wait_base,
@@ -113,4 +137,6 @@  static const struct host1x_syncpt_ops host1x_syncpt_ops = {
 	.load = syncpt_load,
 	.cpu_incr = syncpt_cpu_incr,
 	.patch_wait = syncpt_patch_wait,
+	.assign_channel = syncpt_assign_channel,
+	.set_protection = syncpt_set_protection,
 };
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 048ac9e344ce..fe4d963b3e2a 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -398,6 +398,8 @@  int host1x_syncpt_init(struct host1x *host)
 	for (i = 0; i < host->info->nb_pts; i++) {
 		syncpt[i].id = i;
 		syncpt[i].host = host;
+
+		host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
 	}
 
 	for (i = 0; i < host->info->nb_bases; i++)
@@ -408,6 +410,7 @@  int host1x_syncpt_init(struct host1x *host)
 	host->bases = bases;
 
 	host1x_syncpt_restore(host);
+	host1x_hw_syncpt_set_protection(host, true);
 
 	/* Allocate sync point to use for clearing waits for expired fences */
 	host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);