diff mbox

[5/8] gpu: host1x: Add IOMMU support

Message ID 20161110182345.31777-6-mperttunen@nvidia.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mikko Perttunen Nov. 10, 2016, 6:23 p.m. UTC
Add support for the Host1x unit to be located behind
an IOMMU. This is required when gather buffers may be
allocated non-contiguously in physical memory, as can
be the case when TegraDRM is also using the IOMMU.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
---
 drivers/gpu/host1x/cdma.c       | 65 +++++++++++++++++++++++++++++------
 drivers/gpu/host1x/cdma.h       |  4 ++-
 drivers/gpu/host1x/dev.c        | 39 +++++++++++++++++++--
 drivers/gpu/host1x/dev.h        |  5 +++
 drivers/gpu/host1x/hw/cdma_hw.c | 10 +++---
 drivers/gpu/host1x/job.c        | 76 +++++++++++++++++++++++++++++++++++------
 drivers/gpu/host1x/job.h        |  1 +
 7 files changed, 171 insertions(+), 29 deletions(-)

Comments

Thierry Reding Dec. 5, 2016, 7:49 p.m. UTC | #1
On Thu, Nov 10, 2016 at 08:23:42PM +0200, Mikko Perttunen wrote:
> Add support for the Host1x unit to be located behind
> an IOMMU. This is required when gather buffers may be
> allocated non-contiguously in physical memory, as can
> be the case when TegraDRM is also using the IOMMU.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> ---
>  drivers/gpu/host1x/cdma.c       | 65 +++++++++++++++++++++++++++++------
>  drivers/gpu/host1x/cdma.h       |  4 ++-
>  drivers/gpu/host1x/dev.c        | 39 +++++++++++++++++++--
>  drivers/gpu/host1x/dev.h        |  5 +++
>  drivers/gpu/host1x/hw/cdma_hw.c | 10 +++---
>  drivers/gpu/host1x/job.c        | 76 +++++++++++++++++++++++++++++++++++------
>  drivers/gpu/host1x/job.h        |  1 +
>  7 files changed, 171 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
> index c5d82a8..14692446e 100644
> --- a/drivers/gpu/host1x/cdma.c
> +++ b/drivers/gpu/host1x/cdma.c
> @@ -51,9 +51,15 @@ static void host1x_pushbuffer_destroy(struct push_buffer *pb)
>  	struct host1x_cdma *cdma = pb_to_cdma(pb);
>  	struct host1x *host1x = cdma_to_host1x(cdma);
>  
> -	if (pb->phys != 0)
> -		dma_free_wc(host1x->dev, pb->size_bytes + 4, pb->mapped,
> -			    pb->phys);
> +	if (!pb->phys)
> +		return;
> +
> +	if (host1x->domain) {
> +		iommu_unmap(host1x->domain, pb->dma, pb->alloc_size_bytes);
> +		free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma));
> +	}
> +
> +	dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys);
>  
>  	pb->mapped = NULL;
>  	pb->phys = 0;
> @@ -66,28 +72,65 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
>  {
>  	struct host1x_cdma *cdma = pb_to_cdma(pb);
>  	struct host1x *host1x = cdma_to_host1x(cdma);
> +	struct iova *alloc;
> +	int err;
>  
>  	pb->mapped = NULL;
>  	pb->phys = 0;
>  	pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8;
> +	pb->alloc_size_bytes = pb->size_bytes + 4;
>  
>  	/* initialize buffer pointers */
>  	pb->fence = pb->size_bytes - 8;
>  	pb->pos = 0;
>  
> -	/* allocate and map pushbuffer memory */
> -	pb->mapped = dma_alloc_wc(host1x->dev, pb->size_bytes + 4, &pb->phys,
> -				  GFP_KERNEL);
> -	if (!pb->mapped)
> -		goto fail;
> +	if (host1x->domain) {
> +		unsigned long shift;
> +
> +		pb->alloc_size_bytes = iova_align(&host1x->iova,
> +						  pb->alloc_size_bytes);
> +
> +		pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes,
> +					  &pb->phys, GFP_KERNEL);
> +		if (!pb->mapped)
> +			return -ENOMEM;
> +
> +		shift = iova_shift(&host1x->iova);
> +		alloc = alloc_iova(
> +			&host1x->iova, pb->alloc_size_bytes >> shift,
> +			host1x->domain->geometry.aperture_end >> shift, true);

Let's precompute some of these values to make this expression more
compact. For example pb->alloc_size_bytes is used a couple of times, so
it could be stored temporarily in a local variable with a nice and short
name such as "size".

If you store the aperture end, or limit_pfn, as I commented in an
earlier patch, then this also becomes much more compact.

> +		if (!alloc) {
> +			err = -ENOMEM;
> +			goto iommu_free_mem;
> +		}
> +
> +		err = iommu_map(host1x->domain,
> +				iova_dma_addr(&host1x->iova, alloc),
> +				pb->phys, pb->alloc_size_bytes,
> +				IOMMU_READ);

Same here.

> +		if (err)
> +			goto iommu_free_iova;
> +
> +		pb->dma = iova_dma_addr(&host1x->iova, alloc);
> +	} else {
> +		pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes,
> +					  &pb->phys, GFP_KERNEL);
> +		if (!pb->mapped)
> +			return -ENOMEM;
> +
> +		pb->dma = pb->phys;
> +	}
>  
>  	host1x_hw_pushbuffer_init(host1x, pb);
>  
>  	return 0;
>  
> -fail:
> -	host1x_pushbuffer_destroy(pb);
> -	return -ENOMEM;
> +iommu_free_iova:
> +	__free_iova(&host1x->iova, alloc);
> +iommu_free_mem:
> +	dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys);
> +
> +	return err;
>  }
>  
>  /*
> diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
> index 470087a..4b3645f 100644
> --- a/drivers/gpu/host1x/cdma.h
> +++ b/drivers/gpu/host1x/cdma.h
> @@ -43,10 +43,12 @@ struct host1x_job;
>  
>  struct push_buffer {
>  	void *mapped;			/* mapped pushbuffer memory */
> -	dma_addr_t phys;		/* physical address of pushbuffer */
> +	dma_addr_t dma;			/* device address of pushbuffer */
> +	phys_addr_t phys;		/* physical address of pushbuffer */
>  	u32 fence;			/* index we've written */
>  	u32 pos;			/* index to write to */
>  	u32 size_bytes;
> +	u32 alloc_size_bytes;

Maybe just "alloc_size"?

>  };
>  
>  struct buffer_timeout {
> diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
> index a62317a..ca4527e 100644
> --- a/drivers/gpu/host1x/dev.c
> +++ b/drivers/gpu/host1x/dev.c
> @@ -168,16 +168,36 @@ static int host1x_probe(struct platform_device *pdev)
>  		return err;
>  	}
>  
> +	if (iommu_present(&platform_bus_type)) {
> +		struct iommu_domain_geometry *geometry;
> +		unsigned long order;
> +
> +		host->domain = iommu_domain_alloc(&platform_bus_type);
> +		if (!host->domain)
> +			return -ENOMEM;
> +
> +		err = iommu_attach_device(host->domain, &pdev->dev);
> +		if (err)
> +			goto fail_free_domain;
> +
> +		geometry = &host->domain->geometry;
> +
> +		order = __ffs(host->domain->pgsize_bitmap);
> +		init_iova_domain(&host->iova, 1UL << order,
> +				 geometry->aperture_start >> order,
> +				 geometry->aperture_end >> order);
> +	}
> +
>  	err = host1x_channel_list_init(host);
>  	if (err) {
>  		dev_err(&pdev->dev, "failed to initialize channel list\n");
> -		return err;
> +		goto fail_detach_device;
>  	}
>  
>  	err = clk_prepare_enable(host->clk);
>  	if (err < 0) {
>  		dev_err(&pdev->dev, "failed to enable clock\n");
> -		return err;
> +		goto fail_detach_device;
>  	}
>  
>  	err = host1x_syncpt_init(host);
> @@ -206,6 +226,15 @@ static int host1x_probe(struct platform_device *pdev)
>  	host1x_syncpt_deinit(host);
>  fail_unprepare_disable:
>  	clk_disable_unprepare(host->clk);
> +fail_detach_device:
> +	if (host->domain) {
> +		put_iova_domain(&host->iova);
> +		iommu_detach_device(host->domain, &pdev->dev);
> +	}
> +fail_free_domain:
> +	if (host->domain)
> +		iommu_domain_free(host->domain);
> +
>  	return err;
>  }
>  
> @@ -218,6 +247,12 @@ static int host1x_remove(struct platform_device *pdev)
>  	host1x_syncpt_deinit(host);
>  	clk_disable_unprepare(host->clk);
>  
> +	if (host->domain) {
> +		put_iova_domain(&host->iova);
> +		iommu_detach_device(host->domain, &pdev->dev);
> +		iommu_domain_free(host->domain);
> +	}
> +
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index 5220510..6189825 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -19,6 +19,8 @@
>  
>  #include <linux/platform_device.h>
>  #include <linux/device.h>
> +#include <linux/iommu.h>
> +#include <linux/iova.h>
>  
>  #include "channel.h"
>  #include "syncpt.h"
> @@ -108,6 +110,9 @@ struct host1x {
>  	struct device *dev;
>  	struct clk *clk;
>  
> +	struct iommu_domain *domain;
> +	struct iova_domain iova;
> +
>  	struct mutex intr_mutex;
>  	int intr_syncpt_irq;
>  
> diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
> index 659c1bb..b8c0348 100644
> --- a/drivers/gpu/host1x/hw/cdma_hw.c
> +++ b/drivers/gpu/host1x/hw/cdma_hw.c
> @@ -55,7 +55,7 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
>  		*(p++) = HOST1X_OPCODE_NOP;
>  		*(p++) = HOST1X_OPCODE_NOP;
>  		dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__,
> -			&pb->phys, getptr);
> +			&pb->dma, getptr);
>  		getptr = (getptr + 8) & (pb->size_bytes - 1);
>  	}
>  
> @@ -78,9 +78,9 @@ static void cdma_start(struct host1x_cdma *cdma)
>  			 HOST1X_CHANNEL_DMACTRL);
>  
>  	/* set base, put and end pointer */
> -	host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
> +	host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
>  	host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
> -	host1x_ch_writel(ch, cdma->push_buffer.phys +
> +	host1x_ch_writel(ch, cdma->push_buffer.dma +
>  			 cdma->push_buffer.size_bytes + 4,
>  			 HOST1X_CHANNEL_DMAEND);
>  
> @@ -115,8 +115,8 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr)
>  			 HOST1X_CHANNEL_DMACTRL);
>  
>  	/* set base, end pointer (all of memory) */
> -	host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
> -	host1x_ch_writel(ch, cdma->push_buffer.phys +
> +	host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
> +	host1x_ch_writel(ch, cdma->push_buffer.dma +
>  			 cdma->push_buffer.size_bytes,
>  			 HOST1X_CHANNEL_DMAEND);
>  
> diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
> index a91b7c4..222d930 100644
> --- a/drivers/gpu/host1x/job.c
> +++ b/drivers/gpu/host1x/job.c
> @@ -174,8 +174,9 @@ static int do_waitchks(struct host1x_job *job, struct host1x *host,
>  	return 0;
>  }
>  
> -static unsigned int pin_job(struct host1x_job *job)
> +static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
>  {
> +	int err;
>  	unsigned int i;

Maybe use an inverse christmas tree for variables as in other parts of
this driver? That is, add the new int err belowe the existing unsined
int i.

>  
>  	job->num_unpins = 0;
> @@ -186,12 +187,16 @@ static unsigned int pin_job(struct host1x_job *job)
>  		dma_addr_t phys_addr;
>  
>  		reloc->target.bo = host1x_bo_get(reloc->target.bo);
> -		if (!reloc->target.bo)
> +		if (!reloc->target.bo) {
> +			err = -EINVAL;
>  			goto unpin;
> +		}
>  
>  		phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
> -		if (!phys_addr)
> +		if (!phys_addr) {
> +			err = -EINVAL;
>  			goto unpin;
> +		}
>  
>  		job->addr_phys[job->num_unpins] = phys_addr;
>  		job->unpins[job->num_unpins].bo = reloc->target.bo;
> @@ -204,25 +209,68 @@ static unsigned int pin_job(struct host1x_job *job)
>  		struct sg_table *sgt;
>  		dma_addr_t phys_addr;
>  
> +		size_t gather_size = 0;
> +		struct scatterlist *sg;
> +		struct iova *alloc;
> +		unsigned long shift;
> +		int j;

There should be no blank line between blocks of variable declarations.
Also, make j an unsigned int.

> +
>  		g->bo = host1x_bo_get(g->bo);
> -		if (!g->bo)
> +		if (!g->bo) {
> +			err = -EINVAL;
>  			goto unpin;
> +		}
>  
>  		phys_addr = host1x_bo_pin(g->bo, &sgt);
> -		if (!phys_addr)
> +		if (!phys_addr) {
> +			err = -EINVAL;
>  			goto unpin;
> +		}
> +
> +		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
> +			for_each_sg(sgt->sgl, sg, sgt->nents, j) {
> +				gather_size += sg->length;
> +			}

No need for curly brackets here.

> +			gather_size = iova_align(&host->iova, gather_size);
> +
> +			shift = iova_shift(&host->iova);
> +			alloc = alloc_iova(
> +				&host->iova, gather_size >> shift,
> +				host->domain->geometry.aperture_end >> shift,
> +				true);

This could be made a little more compact as well.

Thierry
Mikko Perttunen Dec. 14, 2016, 8:50 a.m. UTC | #2
On 05.12.2016 21:49, Thierry Reding wrote:
> On Thu, Nov 10, 2016 at 08:23:42PM +0200, Mikko Perttunen wrote:
>> Add support for the Host1x unit to be located behind
>> an IOMMU. This is required when gather buffers may be
>> allocated non-contiguously in physical memory, as can
>> be the case when TegraDRM is also using the IOMMU.
>>
>> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
>> ---
>>  drivers/gpu/host1x/cdma.c       | 65 +++++++++++++++++++++++++++++------
>>  drivers/gpu/host1x/cdma.h       |  4 ++-
>>  drivers/gpu/host1x/dev.c        | 39 +++++++++++++++++++--
>>  drivers/gpu/host1x/dev.h        |  5 +++
>>  drivers/gpu/host1x/hw/cdma_hw.c | 10 +++---
>>  drivers/gpu/host1x/job.c        | 76 +++++++++++++++++++++++++++++++++++------
>>  drivers/gpu/host1x/job.h        |  1 +
>>  7 files changed, 171 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
>> index c5d82a8..14692446e 100644
>> --- a/drivers/gpu/host1x/cdma.c
>> +++ b/drivers/gpu/host1x/cdma.c
>> @@ -51,9 +51,15 @@ static void host1x_pushbuffer_destroy(struct push_buffer *pb)
>>  	struct host1x_cdma *cdma = pb_to_cdma(pb);
>>  	struct host1x *host1x = cdma_to_host1x(cdma);
>>
>> -	if (pb->phys != 0)
>> -		dma_free_wc(host1x->dev, pb->size_bytes + 4, pb->mapped,
>> -			    pb->phys);
>> +	if (!pb->phys)
>> +		return;
>> +
>> +	if (host1x->domain) {
>> +		iommu_unmap(host1x->domain, pb->dma, pb->alloc_size_bytes);
>> +		free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma));
>> +	}
>> +
>> +	dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys);
>>
>>  	pb->mapped = NULL;
>>  	pb->phys = 0;
>> @@ -66,28 +72,65 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
>>  {
>>  	struct host1x_cdma *cdma = pb_to_cdma(pb);
>>  	struct host1x *host1x = cdma_to_host1x(cdma);
>> +	struct iova *alloc;
>> +	int err;
>>
>>  	pb->mapped = NULL;
>>  	pb->phys = 0;
>>  	pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8;
>> +	pb->alloc_size_bytes = pb->size_bytes + 4;
>>
>>  	/* initialize buffer pointers */
>>  	pb->fence = pb->size_bytes - 8;
>>  	pb->pos = 0;
>>
>> -	/* allocate and map pushbuffer memory */
>> -	pb->mapped = dma_alloc_wc(host1x->dev, pb->size_bytes + 4, &pb->phys,
>> -				  GFP_KERNEL);
>> -	if (!pb->mapped)
>> -		goto fail;
>> +	if (host1x->domain) {
>> +		unsigned long shift;
>> +
>> +		pb->alloc_size_bytes = iova_align(&host1x->iova,
>> +						  pb->alloc_size_bytes);
>> +
>> +		pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes,
>> +					  &pb->phys, GFP_KERNEL);
>> +		if (!pb->mapped)
>> +			return -ENOMEM;
>> +
>> +		shift = iova_shift(&host1x->iova);
>> +		alloc = alloc_iova(
>> +			&host1x->iova, pb->alloc_size_bytes >> shift,
>> +			host1x->domain->geometry.aperture_end >> shift, true);
>
> Let's precompute some of these values to make this expression more
> compact. For example pb->alloc_size_bytes is used a couple of times, so
> it could be stored temporarily in a local variable with a nice and short
> name such as "size".
>
> If you store the aperture end, or limit_pfn, as I commented in an
> earlier patch, then this also becomes much more compact.

Made a local "size" variable and stored the aperture end and did some 
other cleanup as well.

>
>> +		if (!alloc) {
>> +			err = -ENOMEM;
>> +			goto iommu_free_mem;
>> +		}
>> +
>> +		err = iommu_map(host1x->domain,
>> +				iova_dma_addr(&host1x->iova, alloc),
>> +				pb->phys, pb->alloc_size_bytes,
>> +				IOMMU_READ);
>
> Same here.

Cleaned up.

>
>> +		if (err)
>> +			goto iommu_free_iova;
>> +
>> +		pb->dma = iova_dma_addr(&host1x->iova, alloc);
>> +	} else {
>> +		pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes,
>> +					  &pb->phys, GFP_KERNEL);
>> +		if (!pb->mapped)
>> +			return -ENOMEM;
>> +
>> +		pb->dma = pb->phys;
>> +	}
>>
>>  	host1x_hw_pushbuffer_init(host1x, pb);
>>
>>  	return 0;
>>
>> -fail:
>> -	host1x_pushbuffer_destroy(pb);
>> -	return -ENOMEM;
>> +iommu_free_iova:
>> +	__free_iova(&host1x->iova, alloc);
>> +iommu_free_mem:
>> +	dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys);
>> +
>> +	return err;
>>  }
>>
>>  /*
>> diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
>> index 470087a..4b3645f 100644
>> --- a/drivers/gpu/host1x/cdma.h
>> +++ b/drivers/gpu/host1x/cdma.h
>> @@ -43,10 +43,12 @@ struct host1x_job;
>>
>>  struct push_buffer {
>>  	void *mapped;			/* mapped pushbuffer memory */
>> -	dma_addr_t phys;		/* physical address of pushbuffer */
>> +	dma_addr_t dma;			/* device address of pushbuffer */
>> +	phys_addr_t phys;		/* physical address of pushbuffer */
>>  	u32 fence;			/* index we've written */
>>  	u32 pos;			/* index to write to */
>>  	u32 size_bytes;
>> +	u32 alloc_size_bytes;
>
> Maybe just "alloc_size"?

Renamed. Also renamed size_bytes to size for consistency.

>
>>  };
>>
>>  struct buffer_timeout {
>> diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
>> index a62317a..ca4527e 100644
>> --- a/drivers/gpu/host1x/dev.c
>> +++ b/drivers/gpu/host1x/dev.c
>> @@ -168,16 +168,36 @@ static int host1x_probe(struct platform_device *pdev)
>>  		return err;
>>  	}
>>
>> +	if (iommu_present(&platform_bus_type)) {
>> +		struct iommu_domain_geometry *geometry;
>> +		unsigned long order;
>> +
>> +		host->domain = iommu_domain_alloc(&platform_bus_type);
>> +		if (!host->domain)
>> +			return -ENOMEM;
>> +
>> +		err = iommu_attach_device(host->domain, &pdev->dev);
>> +		if (err)
>> +			goto fail_free_domain;
>> +
>> +		geometry = &host->domain->geometry;
>> +
>> +		order = __ffs(host->domain->pgsize_bitmap);
>> +		init_iova_domain(&host->iova, 1UL << order,
>> +				 geometry->aperture_start >> order,
>> +				 geometry->aperture_end >> order);
>> +	}
>> +
>>  	err = host1x_channel_list_init(host);
>>  	if (err) {
>>  		dev_err(&pdev->dev, "failed to initialize channel list\n");
>> -		return err;
>> +		goto fail_detach_device;
>>  	}
>>
>>  	err = clk_prepare_enable(host->clk);
>>  	if (err < 0) {
>>  		dev_err(&pdev->dev, "failed to enable clock\n");
>> -		return err;
>> +		goto fail_detach_device;
>>  	}
>>
>>  	err = host1x_syncpt_init(host);
>> @@ -206,6 +226,15 @@ static int host1x_probe(struct platform_device *pdev)
>>  	host1x_syncpt_deinit(host);
>>  fail_unprepare_disable:
>>  	clk_disable_unprepare(host->clk);
>> +fail_detach_device:
>> +	if (host->domain) {
>> +		put_iova_domain(&host->iova);
>> +		iommu_detach_device(host->domain, &pdev->dev);
>> +	}
>> +fail_free_domain:
>> +	if (host->domain)
>> +		iommu_domain_free(host->domain);
>> +
>>  	return err;
>>  }
>>
>> @@ -218,6 +247,12 @@ static int host1x_remove(struct platform_device *pdev)
>>  	host1x_syncpt_deinit(host);
>>  	clk_disable_unprepare(host->clk);
>>
>> +	if (host->domain) {
>> +		put_iova_domain(&host->iova);
>> +		iommu_detach_device(host->domain, &pdev->dev);
>> +		iommu_domain_free(host->domain);
>> +	}
>> +
>>  	return 0;
>>  }
>>
>> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
>> index 5220510..6189825 100644
>> --- a/drivers/gpu/host1x/dev.h
>> +++ b/drivers/gpu/host1x/dev.h
>> @@ -19,6 +19,8 @@
>>
>>  #include <linux/platform_device.h>
>>  #include <linux/device.h>
>> +#include <linux/iommu.h>
>> +#include <linux/iova.h>
>>
>>  #include "channel.h"
>>  #include "syncpt.h"
>> @@ -108,6 +110,9 @@ struct host1x {
>>  	struct device *dev;
>>  	struct clk *clk;
>>
>> +	struct iommu_domain *domain;
>> +	struct iova_domain iova;
>> +
>>  	struct mutex intr_mutex;
>>  	int intr_syncpt_irq;
>>
>> diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
>> index 659c1bb..b8c0348 100644
>> --- a/drivers/gpu/host1x/hw/cdma_hw.c
>> +++ b/drivers/gpu/host1x/hw/cdma_hw.c
>> @@ -55,7 +55,7 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
>>  		*(p++) = HOST1X_OPCODE_NOP;
>>  		*(p++) = HOST1X_OPCODE_NOP;
>>  		dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__,
>> -			&pb->phys, getptr);
>> +			&pb->dma, getptr);
>>  		getptr = (getptr + 8) & (pb->size_bytes - 1);
>>  	}
>>
>> @@ -78,9 +78,9 @@ static void cdma_start(struct host1x_cdma *cdma)
>>  			 HOST1X_CHANNEL_DMACTRL);
>>
>>  	/* set base, put and end pointer */
>> -	host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
>> +	host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
>>  	host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
>> -	host1x_ch_writel(ch, cdma->push_buffer.phys +
>> +	host1x_ch_writel(ch, cdma->push_buffer.dma +
>>  			 cdma->push_buffer.size_bytes + 4,
>>  			 HOST1X_CHANNEL_DMAEND);
>>
>> @@ -115,8 +115,8 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr)
>>  			 HOST1X_CHANNEL_DMACTRL);
>>
>>  	/* set base, end pointer (all of memory) */
>> -	host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
>> -	host1x_ch_writel(ch, cdma->push_buffer.phys +
>> +	host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
>> +	host1x_ch_writel(ch, cdma->push_buffer.dma +
>>  			 cdma->push_buffer.size_bytes,
>>  			 HOST1X_CHANNEL_DMAEND);
>>
>> diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
>> index a91b7c4..222d930 100644
>> --- a/drivers/gpu/host1x/job.c
>> +++ b/drivers/gpu/host1x/job.c
>> @@ -174,8 +174,9 @@ static int do_waitchks(struct host1x_job *job, struct host1x *host,
>>  	return 0;
>>  }
>>
>> -static unsigned int pin_job(struct host1x_job *job)
>> +static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
>>  {
>> +	int err;
>>  	unsigned int i;
>
> Maybe use an inverse christmas tree for variables as in other parts of
> this driver? That is, add the new int err belowe the existing unsined
> int i.

Fixed.

>
>>
>>  	job->num_unpins = 0;
>> @@ -186,12 +187,16 @@ static unsigned int pin_job(struct host1x_job *job)
>>  		dma_addr_t phys_addr;
>>
>>  		reloc->target.bo = host1x_bo_get(reloc->target.bo);
>> -		if (!reloc->target.bo)
>> +		if (!reloc->target.bo) {
>> +			err = -EINVAL;
>>  			goto unpin;
>> +		}
>>
>>  		phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
>> -		if (!phys_addr)
>> +		if (!phys_addr) {
>> +			err = -EINVAL;
>>  			goto unpin;
>> +		}
>>
>>  		job->addr_phys[job->num_unpins] = phys_addr;
>>  		job->unpins[job->num_unpins].bo = reloc->target.bo;
>> @@ -204,25 +209,68 @@ static unsigned int pin_job(struct host1x_job *job)
>>  		struct sg_table *sgt;
>>  		dma_addr_t phys_addr;
>>
>> +		size_t gather_size = 0;
>> +		struct scatterlist *sg;
>> +		struct iova *alloc;
>> +		unsigned long shift;
>> +		int j;
>
> There should be no blank line between blocks of variable declarations.
> Also, make j an unsigned int.

Fixed.

>
>> +
>>  		g->bo = host1x_bo_get(g->bo);
>> -		if (!g->bo)
>> +		if (!g->bo) {
>> +			err = -EINVAL;
>>  			goto unpin;
>> +		}
>>
>>  		phys_addr = host1x_bo_pin(g->bo, &sgt);
>> -		if (!phys_addr)
>> +		if (!phys_addr) {
>> +			err = -EINVAL;
>>  			goto unpin;
>> +		}
>> +
>> +		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
>> +			for_each_sg(sgt->sgl, sg, sgt->nents, j) {
>> +				gather_size += sg->length;
>> +			}
>
> No need for curly brackets here.

Fixed.

>
>> +			gather_size = iova_align(&host->iova, gather_size);
>> +
>> +			shift = iova_shift(&host->iova);
>> +			alloc = alloc_iova(
>> +				&host->iova, gather_size >> shift,
>> +				host->domain->geometry.aperture_end >> shift,
>> +				true);
>
> This could be made a little more compact as well.

Fixed.

>
> Thierry
>

Thanks,
Mikko.
diff mbox

Patch

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index c5d82a8..14692446e 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -51,9 +51,15 @@  static void host1x_pushbuffer_destroy(struct push_buffer *pb)
 	struct host1x_cdma *cdma = pb_to_cdma(pb);
 	struct host1x *host1x = cdma_to_host1x(cdma);
 
-	if (pb->phys != 0)
-		dma_free_wc(host1x->dev, pb->size_bytes + 4, pb->mapped,
-			    pb->phys);
+	if (!pb->phys)
+		return;
+
+	if (host1x->domain) {
+		iommu_unmap(host1x->domain, pb->dma, pb->alloc_size_bytes);
+		free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma));
+	}
+
+	dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys);
 
 	pb->mapped = NULL;
 	pb->phys = 0;
@@ -66,28 +72,65 @@  static int host1x_pushbuffer_init(struct push_buffer *pb)
 {
 	struct host1x_cdma *cdma = pb_to_cdma(pb);
 	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct iova *alloc;
+	int err;
 
 	pb->mapped = NULL;
 	pb->phys = 0;
 	pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8;
+	pb->alloc_size_bytes = pb->size_bytes + 4;
 
 	/* initialize buffer pointers */
 	pb->fence = pb->size_bytes - 8;
 	pb->pos = 0;
 
-	/* allocate and map pushbuffer memory */
-	pb->mapped = dma_alloc_wc(host1x->dev, pb->size_bytes + 4, &pb->phys,
-				  GFP_KERNEL);
-	if (!pb->mapped)
-		goto fail;
+	if (host1x->domain) {
+		unsigned long shift;
+
+		pb->alloc_size_bytes = iova_align(&host1x->iova,
+						  pb->alloc_size_bytes);
+
+		pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes,
+					  &pb->phys, GFP_KERNEL);
+		if (!pb->mapped)
+			return -ENOMEM;
+
+		shift = iova_shift(&host1x->iova);
+		alloc = alloc_iova(
+			&host1x->iova, pb->alloc_size_bytes >> shift,
+			host1x->domain->geometry.aperture_end >> shift, true);
+		if (!alloc) {
+			err = -ENOMEM;
+			goto iommu_free_mem;
+		}
+
+		err = iommu_map(host1x->domain,
+				iova_dma_addr(&host1x->iova, alloc),
+				pb->phys, pb->alloc_size_bytes,
+				IOMMU_READ);
+		if (err)
+			goto iommu_free_iova;
+
+		pb->dma = iova_dma_addr(&host1x->iova, alloc);
+	} else {
+		pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes,
+					  &pb->phys, GFP_KERNEL);
+		if (!pb->mapped)
+			return -ENOMEM;
+
+		pb->dma = pb->phys;
+	}
 
 	host1x_hw_pushbuffer_init(host1x, pb);
 
 	return 0;
 
-fail:
-	host1x_pushbuffer_destroy(pb);
-	return -ENOMEM;
+iommu_free_iova:
+	__free_iova(&host1x->iova, alloc);
+iommu_free_mem:
+	dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys);
+
+	return err;
 }
 
 /*
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index 470087a..4b3645f 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -43,10 +43,12 @@  struct host1x_job;
 
 struct push_buffer {
 	void *mapped;			/* mapped pushbuffer memory */
-	dma_addr_t phys;		/* physical address of pushbuffer */
+	dma_addr_t dma;			/* device address of pushbuffer */
+	phys_addr_t phys;		/* physical address of pushbuffer */
 	u32 fence;			/* index we've written */
 	u32 pos;			/* index to write to */
 	u32 size_bytes;
+	u32 alloc_size_bytes;
 };
 
 struct buffer_timeout {
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index a62317a..ca4527e 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -168,16 +168,36 @@  static int host1x_probe(struct platform_device *pdev)
 		return err;
 	}
 
+	if (iommu_present(&platform_bus_type)) {
+		struct iommu_domain_geometry *geometry;
+		unsigned long order;
+
+		host->domain = iommu_domain_alloc(&platform_bus_type);
+		if (!host->domain)
+			return -ENOMEM;
+
+		err = iommu_attach_device(host->domain, &pdev->dev);
+		if (err)
+			goto fail_free_domain;
+
+		geometry = &host->domain->geometry;
+
+		order = __ffs(host->domain->pgsize_bitmap);
+		init_iova_domain(&host->iova, 1UL << order,
+				 geometry->aperture_start >> order,
+				 geometry->aperture_end >> order);
+	}
+
 	err = host1x_channel_list_init(host);
 	if (err) {
 		dev_err(&pdev->dev, "failed to initialize channel list\n");
-		return err;
+		goto fail_detach_device;
 	}
 
 	err = clk_prepare_enable(host->clk);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to enable clock\n");
-		return err;
+		goto fail_detach_device;
 	}
 
 	err = host1x_syncpt_init(host);
@@ -206,6 +226,15 @@  static int host1x_probe(struct platform_device *pdev)
 	host1x_syncpt_deinit(host);
 fail_unprepare_disable:
 	clk_disable_unprepare(host->clk);
+fail_detach_device:
+	if (host->domain) {
+		put_iova_domain(&host->iova);
+		iommu_detach_device(host->domain, &pdev->dev);
+	}
+fail_free_domain:
+	if (host->domain)
+		iommu_domain_free(host->domain);
+
 	return err;
 }
 
@@ -218,6 +247,12 @@  static int host1x_remove(struct platform_device *pdev)
 	host1x_syncpt_deinit(host);
 	clk_disable_unprepare(host->clk);
 
+	if (host->domain) {
+		put_iova_domain(&host->iova);
+		iommu_detach_device(host->domain, &pdev->dev);
+		iommu_domain_free(host->domain);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 5220510..6189825 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -19,6 +19,8 @@ 
 
 #include <linux/platform_device.h>
 #include <linux/device.h>
+#include <linux/iommu.h>
+#include <linux/iova.h>
 
 #include "channel.h"
 #include "syncpt.h"
@@ -108,6 +110,9 @@  struct host1x {
 	struct device *dev;
 	struct clk *clk;
 
+	struct iommu_domain *domain;
+	struct iova_domain iova;
+
 	struct mutex intr_mutex;
 	int intr_syncpt_irq;
 
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index 659c1bb..b8c0348 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -55,7 +55,7 @@  static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
 		*(p++) = HOST1X_OPCODE_NOP;
 		*(p++) = HOST1X_OPCODE_NOP;
 		dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__,
-			&pb->phys, getptr);
+			&pb->dma, getptr);
 		getptr = (getptr + 8) & (pb->size_bytes - 1);
 	}
 
@@ -78,9 +78,9 @@  static void cdma_start(struct host1x_cdma *cdma)
 			 HOST1X_CHANNEL_DMACTRL);
 
 	/* set base, put and end pointer */
-	host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
+	host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
 	host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
-	host1x_ch_writel(ch, cdma->push_buffer.phys +
+	host1x_ch_writel(ch, cdma->push_buffer.dma +
 			 cdma->push_buffer.size_bytes + 4,
 			 HOST1X_CHANNEL_DMAEND);
 
@@ -115,8 +115,8 @@  static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr)
 			 HOST1X_CHANNEL_DMACTRL);
 
 	/* set base, end pointer (all of memory) */
-	host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
-	host1x_ch_writel(ch, cdma->push_buffer.phys +
+	host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
+	host1x_ch_writel(ch, cdma->push_buffer.dma +
 			 cdma->push_buffer.size_bytes,
 			 HOST1X_CHANNEL_DMAEND);
 
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index a91b7c4..222d930 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -174,8 +174,9 @@  static int do_waitchks(struct host1x_job *job, struct host1x *host,
 	return 0;
 }
 
-static unsigned int pin_job(struct host1x_job *job)
+static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 {
+	int err;
 	unsigned int i;
 
 	job->num_unpins = 0;
@@ -186,12 +187,16 @@  static unsigned int pin_job(struct host1x_job *job)
 		dma_addr_t phys_addr;
 
 		reloc->target.bo = host1x_bo_get(reloc->target.bo);
-		if (!reloc->target.bo)
+		if (!reloc->target.bo) {
+			err = -EINVAL;
 			goto unpin;
+		}
 
 		phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
-		if (!phys_addr)
+		if (!phys_addr) {
+			err = -EINVAL;
 			goto unpin;
+		}
 
 		job->addr_phys[job->num_unpins] = phys_addr;
 		job->unpins[job->num_unpins].bo = reloc->target.bo;
@@ -204,25 +209,68 @@  static unsigned int pin_job(struct host1x_job *job)
 		struct sg_table *sgt;
 		dma_addr_t phys_addr;
 
+		size_t gather_size = 0;
+		struct scatterlist *sg;
+		struct iova *alloc;
+		unsigned long shift;
+		int j;
+
 		g->bo = host1x_bo_get(g->bo);
-		if (!g->bo)
+		if (!g->bo) {
+			err = -EINVAL;
 			goto unpin;
+		}
 
 		phys_addr = host1x_bo_pin(g->bo, &sgt);
-		if (!phys_addr)
+		if (!phys_addr) {
+			err = -EINVAL;
 			goto unpin;
+		}
+
+		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
+			for_each_sg(sgt->sgl, sg, sgt->nents, j) {
+				gather_size += sg->length;
+			}
+			gather_size = iova_align(&host->iova, gather_size);
+
+			shift = iova_shift(&host->iova);
+			alloc = alloc_iova(
+				&host->iova, gather_size >> shift,
+				host->domain->geometry.aperture_end >> shift,
+				true);
+			if (!alloc) {
+				err = -ENOMEM;
+				goto unpin;
+			}
+
+			err = iommu_map_sg(host->domain,
+					iova_dma_addr(&host->iova, alloc),
+					sgt->sgl, sgt->nents, IOMMU_READ);
+			if (err == 0) {
+				__free_iova(&host->iova, alloc);
+				err = -EINVAL;
+				goto unpin;
+			}
+
+			job->addr_phys[job->num_unpins] =
+				iova_dma_addr(&host->iova, alloc);
+			job->unpins[job->num_unpins].size = gather_size;
+		} else {
+			job->addr_phys[job->num_unpins] = phys_addr;
+		}
+
+		job->gather_addr_phys[i] = job->addr_phys[job->num_unpins];
 
-		job->addr_phys[job->num_unpins] = phys_addr;
 		job->unpins[job->num_unpins].bo = g->bo;
 		job->unpins[job->num_unpins].sgt = sgt;
 		job->num_unpins++;
 	}
 
-	return job->num_unpins;
+	return 0;
 
 unpin:
 	host1x_job_unpin(job);
-	return 0;
+	return err;
 }
 
 static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
@@ -525,8 +573,8 @@  int host1x_job_pin(struct host1x_job *job, struct device *dev)
 		host1x_syncpt_load(host->syncpt + i);
 
 	/* pin memory */
-	err = pin_job(job);
-	if (!err)
+	err = pin_job(host, job);
+	if (err)
 		goto out;
 
 	/* patch gathers */
@@ -569,11 +617,19 @@  EXPORT_SYMBOL(host1x_job_pin);
 
 void host1x_job_unpin(struct host1x_job *job)
 {
+	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
 	unsigned int i;
 
 	for (i = 0; i < job->num_unpins; i++) {
 		struct host1x_job_unpin_data *unpin = &job->unpins[i];
 
+		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
+			iommu_unmap(host->domain, job->addr_phys[i],
+				    unpin->size);
+			free_iova(&host->iova,
+				iova_pfn(&host->iova, job->addr_phys[i]));
+		}
+
 		host1x_bo_unpin(unpin->bo, unpin->sgt);
 		host1x_bo_put(unpin->bo);
 	}
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index 8b3c15d..878239c4 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -44,6 +44,7 @@  struct host1x_waitchk {
 struct host1x_job_unpin_data {
 	struct host1x_bo *bo;
 	struct sg_table *sgt;
+	size_t size;
 };
 
 /*