Message ID | 20161110182345.31777-6-mperttunen@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Nov 10, 2016 at 08:23:42PM +0200, Mikko Perttunen wrote: > Add support for the Host1x unit to be located behind > an IOMMU. This is required when gather buffers may be > allocated non-contiguously in physical memory, as can > be the case when TegraDRM is also using the IOMMU. > > Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> > --- > drivers/gpu/host1x/cdma.c | 65 +++++++++++++++++++++++++++++------ > drivers/gpu/host1x/cdma.h | 4 ++- > drivers/gpu/host1x/dev.c | 39 +++++++++++++++++++-- > drivers/gpu/host1x/dev.h | 5 +++ > drivers/gpu/host1x/hw/cdma_hw.c | 10 +++--- > drivers/gpu/host1x/job.c | 76 +++++++++++++++++++++++++++++++++++------ > drivers/gpu/host1x/job.h | 1 + > 7 files changed, 171 insertions(+), 29 deletions(-) > > diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c > index c5d82a8..14692446e 100644 > --- a/drivers/gpu/host1x/cdma.c > +++ b/drivers/gpu/host1x/cdma.c > @@ -51,9 +51,15 @@ static void host1x_pushbuffer_destroy(struct push_buffer *pb) > struct host1x_cdma *cdma = pb_to_cdma(pb); > struct host1x *host1x = cdma_to_host1x(cdma); > > - if (pb->phys != 0) > - dma_free_wc(host1x->dev, pb->size_bytes + 4, pb->mapped, > - pb->phys); > + if (!pb->phys) > + return; > + > + if (host1x->domain) { > + iommu_unmap(host1x->domain, pb->dma, pb->alloc_size_bytes); > + free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma)); > + } > + > + dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys); > > pb->mapped = NULL; > pb->phys = 0; > @@ -66,28 +72,65 @@ static int host1x_pushbuffer_init(struct push_buffer *pb) > { > struct host1x_cdma *cdma = pb_to_cdma(pb); > struct host1x *host1x = cdma_to_host1x(cdma); > + struct iova *alloc; > + int err; > > pb->mapped = NULL; > pb->phys = 0; > pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8; > + pb->alloc_size_bytes = pb->size_bytes + 4; > > /* initialize buffer pointers */ > pb->fence = pb->size_bytes - 8; > pb->pos = 0; > > - /* allocate and map pushbuffer memory */ > - pb->mapped = dma_alloc_wc(host1x->dev, pb->size_bytes + 4, &pb->phys, > - GFP_KERNEL); > - if (!pb->mapped) > - goto fail; > + if (host1x->domain) { > + unsigned long shift; > + > + pb->alloc_size_bytes = iova_align(&host1x->iova, > + pb->alloc_size_bytes); > + > + pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes, > + &pb->phys, GFP_KERNEL); > + if (!pb->mapped) > + return -ENOMEM; > + > + shift = iova_shift(&host1x->iova); > + alloc = alloc_iova( > + &host1x->iova, pb->alloc_size_bytes >> shift, > + host1x->domain->geometry.aperture_end >> shift, true); Let's precompute some of these values to make this expression more compact. For example pb->alloc_size_bytes is used a couple of times, so it could be stored temporarily in a local variable with a nice and short name such as "size". If you store the aperture end, or limit_pfn, as I commented in an earlier patch, then this also becomes much more compact. > + if (!alloc) { > + err = -ENOMEM; > + goto iommu_free_mem; > + } > + > + err = iommu_map(host1x->domain, > + iova_dma_addr(&host1x->iova, alloc), > + pb->phys, pb->alloc_size_bytes, > + IOMMU_READ); Same here. > + if (err) > + goto iommu_free_iova; > + > + pb->dma = iova_dma_addr(&host1x->iova, alloc); > + } else { > + pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes, > + &pb->phys, GFP_KERNEL); > + if (!pb->mapped) > + return -ENOMEM; > + > + pb->dma = pb->phys; > + } > > host1x_hw_pushbuffer_init(host1x, pb); > > return 0; > > -fail: > - host1x_pushbuffer_destroy(pb); > - return -ENOMEM; > +iommu_free_iova: > + __free_iova(&host1x->iova, alloc); > +iommu_free_mem: > + dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys); > + > + return err; > } > > /* > diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h > index 470087a..4b3645f 100644 > --- a/drivers/gpu/host1x/cdma.h > +++ b/drivers/gpu/host1x/cdma.h > @@ -43,10 +43,12 @@ struct host1x_job; > > struct push_buffer { > void *mapped; /* mapped pushbuffer memory */ > - dma_addr_t phys; /* physical address of pushbuffer */ > + dma_addr_t dma; /* device address of pushbuffer */ > + phys_addr_t phys; /* physical address of pushbuffer */ > u32 fence; /* index we've written */ > u32 pos; /* index to write to */ > u32 size_bytes; > + u32 alloc_size_bytes; Maybe just "alloc_size"? > }; > > struct buffer_timeout { > diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c > index a62317a..ca4527e 100644 > --- a/drivers/gpu/host1x/dev.c > +++ b/drivers/gpu/host1x/dev.c > @@ -168,16 +168,36 @@ static int host1x_probe(struct platform_device *pdev) > return err; > } > > + if (iommu_present(&platform_bus_type)) { > + struct iommu_domain_geometry *geometry; > + unsigned long order; > + > + host->domain = iommu_domain_alloc(&platform_bus_type); > + if (!host->domain) > + return -ENOMEM; > + > + err = iommu_attach_device(host->domain, &pdev->dev); > + if (err) > + goto fail_free_domain; > + > + geometry = &host->domain->geometry; > + > + order = __ffs(host->domain->pgsize_bitmap); > + init_iova_domain(&host->iova, 1UL << order, > + geometry->aperture_start >> order, > + geometry->aperture_end >> order); > + } > + > err = host1x_channel_list_init(host); > if (err) { > dev_err(&pdev->dev, "failed to initialize channel list\n"); > - return err; > + goto fail_detach_device; > } > > err = clk_prepare_enable(host->clk); > if (err < 0) { > dev_err(&pdev->dev, "failed to enable clock\n"); > - return err; > + goto fail_detach_device; > } > > err = host1x_syncpt_init(host); > @@ -206,6 +226,15 @@ static int host1x_probe(struct platform_device *pdev) > host1x_syncpt_deinit(host); > fail_unprepare_disable: > clk_disable_unprepare(host->clk); > +fail_detach_device: > + if (host->domain) { > + put_iova_domain(&host->iova); > + iommu_detach_device(host->domain, &pdev->dev); > + } > +fail_free_domain: > + if (host->domain) > + iommu_domain_free(host->domain); > + > return err; > } > > @@ -218,6 +247,12 @@ static int host1x_remove(struct platform_device *pdev) > host1x_syncpt_deinit(host); > clk_disable_unprepare(host->clk); > > + if (host->domain) { > + put_iova_domain(&host->iova); > + iommu_detach_device(host->domain, &pdev->dev); > + iommu_domain_free(host->domain); > + } > + > return 0; > } > > diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h > index 5220510..6189825 100644 > --- a/drivers/gpu/host1x/dev.h > +++ b/drivers/gpu/host1x/dev.h > @@ -19,6 +19,8 @@ > > #include <linux/platform_device.h> > #include <linux/device.h> > +#include <linux/iommu.h> > +#include <linux/iova.h> > > #include "channel.h" > #include "syncpt.h" > @@ -108,6 +110,9 @@ struct host1x { > struct device *dev; > struct clk *clk; > > + struct iommu_domain *domain; > + struct iova_domain iova; > + > struct mutex intr_mutex; > int intr_syncpt_irq; > > diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c > index 659c1bb..b8c0348 100644 > --- a/drivers/gpu/host1x/hw/cdma_hw.c > +++ b/drivers/gpu/host1x/hw/cdma_hw.c > @@ -55,7 +55,7 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr, > *(p++) = HOST1X_OPCODE_NOP; > *(p++) = HOST1X_OPCODE_NOP; > dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__, > - &pb->phys, getptr); > + &pb->dma, getptr); > getptr = (getptr + 8) & (pb->size_bytes - 1); > } > > @@ -78,9 +78,9 @@ static void cdma_start(struct host1x_cdma *cdma) > HOST1X_CHANNEL_DMACTRL); > > /* set base, put and end pointer */ > - host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART); > + host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART); > host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT); > - host1x_ch_writel(ch, cdma->push_buffer.phys + > + host1x_ch_writel(ch, cdma->push_buffer.dma + > cdma->push_buffer.size_bytes + 4, > HOST1X_CHANNEL_DMAEND); > > @@ -115,8 +115,8 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr) > HOST1X_CHANNEL_DMACTRL); > > /* set base, end pointer (all of memory) */ > - host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART); > - host1x_ch_writel(ch, cdma->push_buffer.phys + > + host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART); > + host1x_ch_writel(ch, cdma->push_buffer.dma + > cdma->push_buffer.size_bytes, > HOST1X_CHANNEL_DMAEND); > > diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c > index a91b7c4..222d930 100644 > --- a/drivers/gpu/host1x/job.c > +++ b/drivers/gpu/host1x/job.c > @@ -174,8 +174,9 @@ static int do_waitchks(struct host1x_job *job, struct host1x *host, > return 0; > } > > -static unsigned int pin_job(struct host1x_job *job) > +static unsigned int pin_job(struct host1x *host, struct host1x_job *job) > { > + int err; > unsigned int i; Maybe use an inverse christmas tree for variables as in other parts of this driver? That is, add the new int err belowe the existing unsined int i. > > job->num_unpins = 0; > @@ -186,12 +187,16 @@ static unsigned int pin_job(struct host1x_job *job) > dma_addr_t phys_addr; > > reloc->target.bo = host1x_bo_get(reloc->target.bo); > - if (!reloc->target.bo) > + if (!reloc->target.bo) { > + err = -EINVAL; > goto unpin; > + } > > phys_addr = host1x_bo_pin(reloc->target.bo, &sgt); > - if (!phys_addr) > + if (!phys_addr) { > + err = -EINVAL; > goto unpin; > + } > > job->addr_phys[job->num_unpins] = phys_addr; > job->unpins[job->num_unpins].bo = reloc->target.bo; > @@ -204,25 +209,68 @@ static unsigned int pin_job(struct host1x_job *job) > struct sg_table *sgt; > dma_addr_t phys_addr; > > + size_t gather_size = 0; > + struct scatterlist *sg; > + struct iova *alloc; > + unsigned long shift; > + int j; There should be no blank line between blocks of variable declarations. Also, make j an unsigned int. > + > g->bo = host1x_bo_get(g->bo); > - if (!g->bo) > + if (!g->bo) { > + err = -EINVAL; > goto unpin; > + } > > phys_addr = host1x_bo_pin(g->bo, &sgt); > - if (!phys_addr) > + if (!phys_addr) { > + err = -EINVAL; > goto unpin; > + } > + > + if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) { > + for_each_sg(sgt->sgl, sg, sgt->nents, j) { > + gather_size += sg->length; > + } No need for curly brackets here. > + gather_size = iova_align(&host->iova, gather_size); > + > + shift = iova_shift(&host->iova); > + alloc = alloc_iova( > + &host->iova, gather_size >> shift, > + host->domain->geometry.aperture_end >> shift, > + true); This could be made a little more compact as well. Thierry
On 05.12.2016 21:49, Thierry Reding wrote: > On Thu, Nov 10, 2016 at 08:23:42PM +0200, Mikko Perttunen wrote: >> Add support for the Host1x unit to be located behind >> an IOMMU. This is required when gather buffers may be >> allocated non-contiguously in physical memory, as can >> be the case when TegraDRM is also using the IOMMU. >> >> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> >> --- >> drivers/gpu/host1x/cdma.c | 65 +++++++++++++++++++++++++++++------ >> drivers/gpu/host1x/cdma.h | 4 ++- >> drivers/gpu/host1x/dev.c | 39 +++++++++++++++++++-- >> drivers/gpu/host1x/dev.h | 5 +++ >> drivers/gpu/host1x/hw/cdma_hw.c | 10 +++--- >> drivers/gpu/host1x/job.c | 76 +++++++++++++++++++++++++++++++++++------ >> drivers/gpu/host1x/job.h | 1 + >> 7 files changed, 171 insertions(+), 29 deletions(-) >> >> diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c >> index c5d82a8..14692446e 100644 >> --- a/drivers/gpu/host1x/cdma.c >> +++ b/drivers/gpu/host1x/cdma.c >> @@ -51,9 +51,15 @@ static void host1x_pushbuffer_destroy(struct push_buffer *pb) >> struct host1x_cdma *cdma = pb_to_cdma(pb); >> struct host1x *host1x = cdma_to_host1x(cdma); >> >> - if (pb->phys != 0) >> - dma_free_wc(host1x->dev, pb->size_bytes + 4, pb->mapped, >> - pb->phys); >> + if (!pb->phys) >> + return; >> + >> + if (host1x->domain) { >> + iommu_unmap(host1x->domain, pb->dma, pb->alloc_size_bytes); >> + free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma)); >> + } >> + >> + dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys); >> >> pb->mapped = NULL; >> pb->phys = 0; >> @@ -66,28 +72,65 @@ static int host1x_pushbuffer_init(struct push_buffer *pb) >> { >> struct host1x_cdma *cdma = pb_to_cdma(pb); >> struct host1x *host1x = cdma_to_host1x(cdma); >> + struct iova *alloc; >> + int err; >> >> pb->mapped = NULL; >> pb->phys = 0; >> pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8; >> + pb->alloc_size_bytes = pb->size_bytes + 4; >> >> /* initialize buffer pointers */ >> pb->fence = pb->size_bytes - 8; >> pb->pos = 0; >> >> - /* allocate and map pushbuffer memory */ >> - pb->mapped = dma_alloc_wc(host1x->dev, pb->size_bytes + 4, &pb->phys, >> - GFP_KERNEL); >> - if (!pb->mapped) >> - goto fail; >> + if (host1x->domain) { >> + unsigned long shift; >> + >> + pb->alloc_size_bytes = iova_align(&host1x->iova, >> + pb->alloc_size_bytes); >> + >> + pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes, >> + &pb->phys, GFP_KERNEL); >> + if (!pb->mapped) >> + return -ENOMEM; >> + >> + shift = iova_shift(&host1x->iova); >> + alloc = alloc_iova( >> + &host1x->iova, pb->alloc_size_bytes >> shift, >> + host1x->domain->geometry.aperture_end >> shift, true); > > Let's precompute some of these values to make this expression more > compact. For example pb->alloc_size_bytes is used a couple of times, so > it could be stored temporarily in a local variable with a nice and short > name such as "size". > > If you store the aperture end, or limit_pfn, as I commented in an > earlier patch, then this also becomes much more compact. Made a local "size" variable and stored the aperture end and did some other cleanup as well. > >> + if (!alloc) { >> + err = -ENOMEM; >> + goto iommu_free_mem; >> + } >> + >> + err = iommu_map(host1x->domain, >> + iova_dma_addr(&host1x->iova, alloc), >> + pb->phys, pb->alloc_size_bytes, >> + IOMMU_READ); > > Same here. Cleaned up. > >> + if (err) >> + goto iommu_free_iova; >> + >> + pb->dma = iova_dma_addr(&host1x->iova, alloc); >> + } else { >> + pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes, >> + &pb->phys, GFP_KERNEL); >> + if (!pb->mapped) >> + return -ENOMEM; >> + >> + pb->dma = pb->phys; >> + } >> >> host1x_hw_pushbuffer_init(host1x, pb); >> >> return 0; >> >> -fail: >> - host1x_pushbuffer_destroy(pb); >> - return -ENOMEM; >> +iommu_free_iova: >> + __free_iova(&host1x->iova, alloc); >> +iommu_free_mem: >> + dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys); >> + >> + return err; >> } >> >> /* >> diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h >> index 470087a..4b3645f 100644 >> --- a/drivers/gpu/host1x/cdma.h >> +++ b/drivers/gpu/host1x/cdma.h >> @@ -43,10 +43,12 @@ struct host1x_job; >> >> struct push_buffer { >> void *mapped; /* mapped pushbuffer memory */ >> - dma_addr_t phys; /* physical address of pushbuffer */ >> + dma_addr_t dma; /* device address of pushbuffer */ >> + phys_addr_t phys; /* physical address of pushbuffer */ >> u32 fence; /* index we've written */ >> u32 pos; /* index to write to */ >> u32 size_bytes; >> + u32 alloc_size_bytes; > > Maybe just "alloc_size"? Renamed. Also renamed size_bytes to size for consistency. > >> }; >> >> struct buffer_timeout { >> diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c >> index a62317a..ca4527e 100644 >> --- a/drivers/gpu/host1x/dev.c >> +++ b/drivers/gpu/host1x/dev.c >> @@ -168,16 +168,36 @@ static int host1x_probe(struct platform_device *pdev) >> return err; >> } >> >> + if (iommu_present(&platform_bus_type)) { >> + struct iommu_domain_geometry *geometry; >> + unsigned long order; >> + >> + host->domain = iommu_domain_alloc(&platform_bus_type); >> + if (!host->domain) >> + return -ENOMEM; >> + >> + err = iommu_attach_device(host->domain, &pdev->dev); >> + if (err) >> + goto fail_free_domain; >> + >> + geometry = &host->domain->geometry; >> + >> + order = __ffs(host->domain->pgsize_bitmap); >> + init_iova_domain(&host->iova, 1UL << order, >> + geometry->aperture_start >> order, >> + geometry->aperture_end >> order); >> + } >> + >> err = host1x_channel_list_init(host); >> if (err) { >> dev_err(&pdev->dev, "failed to initialize channel list\n"); >> - return err; >> + goto fail_detach_device; >> } >> >> err = clk_prepare_enable(host->clk); >> if (err < 0) { >> dev_err(&pdev->dev, "failed to enable clock\n"); >> - return err; >> + goto fail_detach_device; >> } >> >> err = host1x_syncpt_init(host); >> @@ -206,6 +226,15 @@ static int host1x_probe(struct platform_device *pdev) >> host1x_syncpt_deinit(host); >> fail_unprepare_disable: >> clk_disable_unprepare(host->clk); >> +fail_detach_device: >> + if (host->domain) { >> + put_iova_domain(&host->iova); >> + iommu_detach_device(host->domain, &pdev->dev); >> + } >> +fail_free_domain: >> + if (host->domain) >> + iommu_domain_free(host->domain); >> + >> return err; >> } >> >> @@ -218,6 +247,12 @@ static int host1x_remove(struct platform_device *pdev) >> host1x_syncpt_deinit(host); >> clk_disable_unprepare(host->clk); >> >> + if (host->domain) { >> + put_iova_domain(&host->iova); >> + iommu_detach_device(host->domain, &pdev->dev); >> + iommu_domain_free(host->domain); >> + } >> + >> return 0; >> } >> >> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h >> index 5220510..6189825 100644 >> --- a/drivers/gpu/host1x/dev.h >> +++ b/drivers/gpu/host1x/dev.h >> @@ -19,6 +19,8 @@ >> >> #include <linux/platform_device.h> >> #include <linux/device.h> >> +#include <linux/iommu.h> >> +#include <linux/iova.h> >> >> #include "channel.h" >> #include "syncpt.h" >> @@ -108,6 +110,9 @@ struct host1x { >> struct device *dev; >> struct clk *clk; >> >> + struct iommu_domain *domain; >> + struct iova_domain iova; >> + >> struct mutex intr_mutex; >> int intr_syncpt_irq; >> >> diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c >> index 659c1bb..b8c0348 100644 >> --- a/drivers/gpu/host1x/hw/cdma_hw.c >> +++ b/drivers/gpu/host1x/hw/cdma_hw.c >> @@ -55,7 +55,7 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr, >> *(p++) = HOST1X_OPCODE_NOP; >> *(p++) = HOST1X_OPCODE_NOP; >> dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__, >> - &pb->phys, getptr); >> + &pb->dma, getptr); >> getptr = (getptr + 8) & (pb->size_bytes - 1); >> } >> >> @@ -78,9 +78,9 @@ static void cdma_start(struct host1x_cdma *cdma) >> HOST1X_CHANNEL_DMACTRL); >> >> /* set base, put and end pointer */ >> - host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART); >> + host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART); >> host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT); >> - host1x_ch_writel(ch, cdma->push_buffer.phys + >> + host1x_ch_writel(ch, cdma->push_buffer.dma + >> cdma->push_buffer.size_bytes + 4, >> HOST1X_CHANNEL_DMAEND); >> >> @@ -115,8 +115,8 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr) >> HOST1X_CHANNEL_DMACTRL); >> >> /* set base, end pointer (all of memory) */ >> - host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART); >> - host1x_ch_writel(ch, cdma->push_buffer.phys + >> + host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART); >> + host1x_ch_writel(ch, cdma->push_buffer.dma + >> cdma->push_buffer.size_bytes, >> HOST1X_CHANNEL_DMAEND); >> >> diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c >> index a91b7c4..222d930 100644 >> --- a/drivers/gpu/host1x/job.c >> +++ b/drivers/gpu/host1x/job.c >> @@ -174,8 +174,9 @@ static int do_waitchks(struct host1x_job *job, struct host1x *host, >> return 0; >> } >> >> -static unsigned int pin_job(struct host1x_job *job) >> +static unsigned int pin_job(struct host1x *host, struct host1x_job *job) >> { >> + int err; >> unsigned int i; > > Maybe use an inverse christmas tree for variables as in other parts of > this driver? That is, add the new int err belowe the existing unsined > int i. Fixed. > >> >> job->num_unpins = 0; >> @@ -186,12 +187,16 @@ static unsigned int pin_job(struct host1x_job *job) >> dma_addr_t phys_addr; >> >> reloc->target.bo = host1x_bo_get(reloc->target.bo); >> - if (!reloc->target.bo) >> + if (!reloc->target.bo) { >> + err = -EINVAL; >> goto unpin; >> + } >> >> phys_addr = host1x_bo_pin(reloc->target.bo, &sgt); >> - if (!phys_addr) >> + if (!phys_addr) { >> + err = -EINVAL; >> goto unpin; >> + } >> >> job->addr_phys[job->num_unpins] = phys_addr; >> job->unpins[job->num_unpins].bo = reloc->target.bo; >> @@ -204,25 +209,68 @@ static unsigned int pin_job(struct host1x_job *job) >> struct sg_table *sgt; >> dma_addr_t phys_addr; >> >> + size_t gather_size = 0; >> + struct scatterlist *sg; >> + struct iova *alloc; >> + unsigned long shift; >> + int j; > > There should be no blank line between blocks of variable declarations. > Also, make j an unsigned int. Fixed. > >> + >> g->bo = host1x_bo_get(g->bo); >> - if (!g->bo) >> + if (!g->bo) { >> + err = -EINVAL; >> goto unpin; >> + } >> >> phys_addr = host1x_bo_pin(g->bo, &sgt); >> - if (!phys_addr) >> + if (!phys_addr) { >> + err = -EINVAL; >> goto unpin; >> + } >> + >> + if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) { >> + for_each_sg(sgt->sgl, sg, sgt->nents, j) { >> + gather_size += sg->length; >> + } > > No need for curly brackets here. Fixed. > >> + gather_size = iova_align(&host->iova, gather_size); >> + >> + shift = iova_shift(&host->iova); >> + alloc = alloc_iova( >> + &host->iova, gather_size >> shift, >> + host->domain->geometry.aperture_end >> shift, >> + true); > > This could be made a little more compact as well. Fixed. > > Thierry > Thanks, Mikko.
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index c5d82a8..14692446e 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -51,9 +51,15 @@ static void host1x_pushbuffer_destroy(struct push_buffer *pb) struct host1x_cdma *cdma = pb_to_cdma(pb); struct host1x *host1x = cdma_to_host1x(cdma); - if (pb->phys != 0) - dma_free_wc(host1x->dev, pb->size_bytes + 4, pb->mapped, - pb->phys); + if (!pb->phys) + return; + + if (host1x->domain) { + iommu_unmap(host1x->domain, pb->dma, pb->alloc_size_bytes); + free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma)); + } + + dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys); pb->mapped = NULL; pb->phys = 0; @@ -66,28 +72,65 @@ static int host1x_pushbuffer_init(struct push_buffer *pb) { struct host1x_cdma *cdma = pb_to_cdma(pb); struct host1x *host1x = cdma_to_host1x(cdma); + struct iova *alloc; + int err; pb->mapped = NULL; pb->phys = 0; pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8; + pb->alloc_size_bytes = pb->size_bytes + 4; /* initialize buffer pointers */ pb->fence = pb->size_bytes - 8; pb->pos = 0; - /* allocate and map pushbuffer memory */ - pb->mapped = dma_alloc_wc(host1x->dev, pb->size_bytes + 4, &pb->phys, - GFP_KERNEL); - if (!pb->mapped) - goto fail; + if (host1x->domain) { + unsigned long shift; + + pb->alloc_size_bytes = iova_align(&host1x->iova, + pb->alloc_size_bytes); + + pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes, + &pb->phys, GFP_KERNEL); + if (!pb->mapped) + return -ENOMEM; + + shift = iova_shift(&host1x->iova); + alloc = alloc_iova( + &host1x->iova, pb->alloc_size_bytes >> shift, + host1x->domain->geometry.aperture_end >> shift, true); + if (!alloc) { + err = -ENOMEM; + goto iommu_free_mem; + } + + err = iommu_map(host1x->domain, + iova_dma_addr(&host1x->iova, alloc), + pb->phys, pb->alloc_size_bytes, + IOMMU_READ); + if (err) + goto iommu_free_iova; + + pb->dma = iova_dma_addr(&host1x->iova, alloc); + } else { + pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes, + &pb->phys, GFP_KERNEL); + if (!pb->mapped) + return -ENOMEM; + + pb->dma = pb->phys; + } host1x_hw_pushbuffer_init(host1x, pb); return 0; -fail: - host1x_pushbuffer_destroy(pb); - return -ENOMEM; +iommu_free_iova: + __free_iova(&host1x->iova, alloc); +iommu_free_mem: + dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys); + + return err; } /* diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h index 470087a..4b3645f 100644 --- a/drivers/gpu/host1x/cdma.h +++ b/drivers/gpu/host1x/cdma.h @@ -43,10 +43,12 @@ struct host1x_job; struct push_buffer { void *mapped; /* mapped pushbuffer memory */ - dma_addr_t phys; /* physical address of pushbuffer */ + dma_addr_t dma; /* device address of pushbuffer */ + phys_addr_t phys; /* physical address of pushbuffer */ u32 fence; /* index we've written */ u32 pos; /* index to write to */ u32 size_bytes; + u32 alloc_size_bytes; }; struct buffer_timeout { diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index a62317a..ca4527e 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -168,16 +168,36 @@ static int host1x_probe(struct platform_device *pdev) return err; } + if (iommu_present(&platform_bus_type)) { + struct iommu_domain_geometry *geometry; + unsigned long order; + + host->domain = iommu_domain_alloc(&platform_bus_type); + if (!host->domain) + return -ENOMEM; + + err = iommu_attach_device(host->domain, &pdev->dev); + if (err) + goto fail_free_domain; + + geometry = &host->domain->geometry; + + order = __ffs(host->domain->pgsize_bitmap); + init_iova_domain(&host->iova, 1UL << order, + geometry->aperture_start >> order, + geometry->aperture_end >> order); + } + err = host1x_channel_list_init(host); if (err) { dev_err(&pdev->dev, "failed to initialize channel list\n"); - return err; + goto fail_detach_device; } err = clk_prepare_enable(host->clk); if (err < 0) { dev_err(&pdev->dev, "failed to enable clock\n"); - return err; + goto fail_detach_device; } err = host1x_syncpt_init(host); @@ -206,6 +226,15 @@ static int host1x_probe(struct platform_device *pdev) host1x_syncpt_deinit(host); fail_unprepare_disable: clk_disable_unprepare(host->clk); +fail_detach_device: + if (host->domain) { + put_iova_domain(&host->iova); + iommu_detach_device(host->domain, &pdev->dev); + } +fail_free_domain: + if (host->domain) + iommu_domain_free(host->domain); + return err; } @@ -218,6 +247,12 @@ static int host1x_remove(struct platform_device *pdev) host1x_syncpt_deinit(host); clk_disable_unprepare(host->clk); + if (host->domain) { + put_iova_domain(&host->iova); + iommu_detach_device(host->domain, &pdev->dev); + iommu_domain_free(host->domain); + } + return 0; } diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index 5220510..6189825 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -19,6 +19,8 @@ #include <linux/platform_device.h> #include <linux/device.h> +#include <linux/iommu.h> +#include <linux/iova.h> #include "channel.h" #include "syncpt.h" @@ -108,6 +110,9 @@ struct host1x { struct device *dev; struct clk *clk; + struct iommu_domain *domain; + struct iova_domain iova; + struct mutex intr_mutex; int intr_syncpt_irq; diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c index 659c1bb..b8c0348 100644 --- a/drivers/gpu/host1x/hw/cdma_hw.c +++ b/drivers/gpu/host1x/hw/cdma_hw.c @@ -55,7 +55,7 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr, *(p++) = HOST1X_OPCODE_NOP; *(p++) = HOST1X_OPCODE_NOP; dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__, - &pb->phys, getptr); + &pb->dma, getptr); getptr = (getptr + 8) & (pb->size_bytes - 1); } @@ -78,9 +78,9 @@ static void cdma_start(struct host1x_cdma *cdma) HOST1X_CHANNEL_DMACTRL); /* set base, put and end pointer */ - host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART); + host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART); host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT); - host1x_ch_writel(ch, cdma->push_buffer.phys + + host1x_ch_writel(ch, cdma->push_buffer.dma + cdma->push_buffer.size_bytes + 4, HOST1X_CHANNEL_DMAEND); @@ -115,8 +115,8 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr) HOST1X_CHANNEL_DMACTRL); /* set base, end pointer (all of memory) */ - host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART); - host1x_ch_writel(ch, cdma->push_buffer.phys + + host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART); + host1x_ch_writel(ch, cdma->push_buffer.dma + cdma->push_buffer.size_bytes, HOST1X_CHANNEL_DMAEND); diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index a91b7c4..222d930 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -174,8 +174,9 @@ static int do_waitchks(struct host1x_job *job, struct host1x *host, return 0; } -static unsigned int pin_job(struct host1x_job *job) +static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { + int err; unsigned int i; job->num_unpins = 0; @@ -186,12 +187,16 @@ static unsigned int pin_job(struct host1x_job *job) dma_addr_t phys_addr; reloc->target.bo = host1x_bo_get(reloc->target.bo); - if (!reloc->target.bo) + if (!reloc->target.bo) { + err = -EINVAL; goto unpin; + } phys_addr = host1x_bo_pin(reloc->target.bo, &sgt); - if (!phys_addr) + if (!phys_addr) { + err = -EINVAL; goto unpin; + } job->addr_phys[job->num_unpins] = phys_addr; job->unpins[job->num_unpins].bo = reloc->target.bo; @@ -204,25 +209,68 @@ static unsigned int pin_job(struct host1x_job *job) struct sg_table *sgt; dma_addr_t phys_addr; + size_t gather_size = 0; + struct scatterlist *sg; + struct iova *alloc; + unsigned long shift; + int j; + g->bo = host1x_bo_get(g->bo); - if (!g->bo) + if (!g->bo) { + err = -EINVAL; goto unpin; + } phys_addr = host1x_bo_pin(g->bo, &sgt); - if (!phys_addr) + if (!phys_addr) { + err = -EINVAL; goto unpin; + } + + if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) { + for_each_sg(sgt->sgl, sg, sgt->nents, j) { + gather_size += sg->length; + } + gather_size = iova_align(&host->iova, gather_size); + + shift = iova_shift(&host->iova); + alloc = alloc_iova( + &host->iova, gather_size >> shift, + host->domain->geometry.aperture_end >> shift, + true); + if (!alloc) { + err = -ENOMEM; + goto unpin; + } + + err = iommu_map_sg(host->domain, + iova_dma_addr(&host->iova, alloc), + sgt->sgl, sgt->nents, IOMMU_READ); + if (err == 0) { + __free_iova(&host->iova, alloc); + err = -EINVAL; + goto unpin; + } + + job->addr_phys[job->num_unpins] = + iova_dma_addr(&host->iova, alloc); + job->unpins[job->num_unpins].size = gather_size; + } else { + job->addr_phys[job->num_unpins] = phys_addr; + } + + job->gather_addr_phys[i] = job->addr_phys[job->num_unpins]; - job->addr_phys[job->num_unpins] = phys_addr; job->unpins[job->num_unpins].bo = g->bo; job->unpins[job->num_unpins].sgt = sgt; job->num_unpins++; } - return job->num_unpins; + return 0; unpin: host1x_job_unpin(job); - return 0; + return err; } static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) @@ -525,8 +573,8 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) host1x_syncpt_load(host->syncpt + i); /* pin memory */ - err = pin_job(job); - if (!err) + err = pin_job(host, job); + if (err) goto out; /* patch gathers */ @@ -569,11 +617,19 @@ EXPORT_SYMBOL(host1x_job_pin); void host1x_job_unpin(struct host1x_job *job) { + struct host1x *host = dev_get_drvdata(job->channel->dev->parent); unsigned int i; for (i = 0; i < job->num_unpins; i++) { struct host1x_job_unpin_data *unpin = &job->unpins[i]; + if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) { + iommu_unmap(host->domain, job->addr_phys[i], + unpin->size); + free_iova(&host->iova, + iova_pfn(&host->iova, job->addr_phys[i])); + } + host1x_bo_unpin(unpin->bo, unpin->sgt); host1x_bo_put(unpin->bo); } diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index 8b3c15d..878239c4 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -44,6 +44,7 @@ struct host1x_waitchk { struct host1x_job_unpin_data { struct host1x_bo *bo; struct sg_table *sgt; + size_t size; }; /*
Add support for the Host1x unit to be located behind an IOMMU. This is required when gather buffers may be allocated non-contiguously in physical memory, as can be the case when TegraDRM is also using the IOMMU. Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> --- drivers/gpu/host1x/cdma.c | 65 +++++++++++++++++++++++++++++------ drivers/gpu/host1x/cdma.h | 4 ++- drivers/gpu/host1x/dev.c | 39 +++++++++++++++++++-- drivers/gpu/host1x/dev.h | 5 +++ drivers/gpu/host1x/hw/cdma_hw.c | 10 +++--- drivers/gpu/host1x/job.c | 76 +++++++++++++++++++++++++++++++++++------ drivers/gpu/host1x/job.h | 1 + 7 files changed, 171 insertions(+), 29 deletions(-)