[RFC,v1] iommu/arm-smmu: Allow domains to choose a context bank
diff mbox series

Message ID 1580250823-30739-1-git-send-email-jcrouse@codeaurora.org
State New, archived
Headers show
Series
  • [RFC,v1] iommu/arm-smmu: Allow domains to choose a context bank
Related show

Commit Message

Jordan Crouse Jan. 28, 2020, 10:33 p.m. UTC
Domains which are being set up for split pagetables usually want to be
on a specific context bank for hardware reasons. Force the context
bank for domains with the split-pagetable quirk to context bank 0.
If context bank 0 is taken, move that context bank to another unused
bank and rewrite the stream matching registers accordingly.

This is be used by [1] and [2] to leave context bank 0 open so that
the Adreno GPU can program it.

[1] https://lists.linuxfoundation.org/pipermail/iommu/2020-January/041438.html
[2] https://lists.linuxfoundation.org/pipermail/iommu/2020-January/041444.html

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---

 drivers/iommu/arm-smmu.c | 63 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 59 insertions(+), 4 deletions(-)

Comments

Rob Clark Feb. 18, 2020, 6:19 p.m. UTC | #1
On Tue, Jan 28, 2020 at 2:34 PM Jordan Crouse <jcrouse@codeaurora.org> wrote:
>
> Domains which are being set up for split pagetables usually want to be
> on a specific context bank for hardware reasons. Force the context
> bank for domains with the split-pagetable quirk to context bank 0.
> If context bank 0 is taken, move that context bank to another unused
> bank and rewrite the stream matching registers accordingly.

Is the only reason for dealing with the case that bank 0 is already in
use, due to the DMA domain that gets setup before driver probes?

I'm kinda thinking that we need to invent a way to unwind/detatch the
DMA domain, and unhook the iommu-dmaops, since this seems to already
be already causing problems with dma-bufs imported from other drivers
(who expect that dma_map_*(), with the importing device's dev ptr,
will do something sane.

BR,
-R

>
> This is be used by [1] and [2] to leave context bank 0 open so that
> the Adreno GPU can program it.
>
> [1] https://lists.linuxfoundation.org/pipermail/iommu/2020-January/041438.html
> [2] https://lists.linuxfoundation.org/pipermail/iommu/2020-January/041444.html
>
> Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
> ---
>
>  drivers/iommu/arm-smmu.c | 63 +++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 59 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index 85a6773..799a254 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -254,6 +254,43 @@ static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
>         return idx;
>  }
>
> +static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx);
> +
> +static int __arm_smmu_alloc_cb(struct arm_smmu_device *smmu, int start,
> +               int target)
> +{
> +       int new, i;
> +
> +       /* Allocate a new context bank id */
> +       new = __arm_smmu_alloc_bitmap(smmu->context_map, start,
> +               smmu->num_context_banks);
> +
> +       if (new < 0)
> +               return new;
> +
> +       /* If no target is set or we actually got the bank index we wanted */
> +       if (target == -1 || new == target)
> +               return new;
> +
> +       /* Copy the context configuration to the new index */
> +       memcpy(&smmu->cbs[new], &smmu->cbs[target], sizeof(*smmu->cbs));
> +       smmu->cbs[new].cfg->cbndx = new;
> +
> +       /* FIXME: Do we need locking here? */
> +       for (i = 0; i < smmu->num_mapping_groups; i++) {
> +               if (smmu->s2crs[i].cbndx == target) {
> +                       smmu->s2crs[i].cbndx = new;
> +                       arm_smmu_write_s2cr(smmu, i);
> +               }
> +       }
> +
> +       /*
> +        * FIXME: Does getting here imply that 'target' is already set in the
> +        * context_map?
> +        */
> +       return target;
> +}
> +
>  static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
>  {
>         clear_bit(idx, map);
> @@ -770,6 +807,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
>         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>         unsigned long quirks = 0;
> +       int forcecb = -1;
>
>         mutex_lock(&smmu_domain->init_mutex);
>         if (smmu_domain->smmu)
> @@ -844,8 +882,25 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
>                          * SEP_UPSTREAM so we don't need to reduce the size of
>                          * the ias to account for the sign extension bit
>                          */
> -                       if (smmu_domain->split_pagetables)
> -                               quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1;
> +                       if (smmu_domain->split_pagetables) {
> +                               /*
> +                                * If split pagetables are enabled, assume that
> +                                * the user's intent is to use per-instance
> +                                * pagetables which, at least on a QCOM target,
> +                                * means that this domain should be on context
> +                                * bank 0.
> +                                */
> +
> +                               /*
> +                                * If we can't force to context bank 0 then
> +                                * don't bother enabling split pagetables which
> +                                * then would not allow aux domains
> +                                */
> +                               if (start == 0) {
> +                                       forcecb = 0;
> +                                       quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1;
> +                               }
> +                       }
>                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
>                         fmt = ARM_32_LPAE_S1;
>                         ias = min(ias, 32UL);
> @@ -883,8 +938,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
>                 ret = -EINVAL;
>                 goto out_unlock;
>         }
> -       ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
> -                                     smmu->num_context_banks);
> +
> +       ret = __arm_smmu_alloc_cb(smmu, start, forcecb);
>         if (ret < 0)
>                 goto out_unlock;
>
> --
> 2.7.4
Jordan Crouse Feb. 18, 2020, 6:41 p.m. UTC | #2
On Tue, Feb 18, 2020 at 10:19:53AM -0800, Rob Clark wrote:
> On Tue, Jan 28, 2020 at 2:34 PM Jordan Crouse <jcrouse@codeaurora.org> wrote:
> >
> > Domains which are being set up for split pagetables usually want to be
> > on a specific context bank for hardware reasons. Force the context
> > bank for domains with the split-pagetable quirk to context bank 0.
> > If context bank 0 is taken, move that context bank to another unused
> > bank and rewrite the stream matching registers accordingly.
> 
> Is the only reason for dealing with the case that bank 0 is already in
> use, due to the DMA domain that gets setup before driver probes?

Right. On Adreno GPUs only one context bank at a time is accessible from the
GPU through an aperture which defaults to context bank 0 and as you might
expect, the aperture controls are protected by the secure world on AC enabled
targets.

Some of the newer targets have a SCM call to switch the aperture but for all the
currently merged platforms we are forced to use context bank 0.

> I'm kinda thinking that we need to invent a way to unwind/detatch the
> DMA domain, and unhook the iommu-dmaops, since this seems to already
> be already causing problems with dma-bufs imported from other drivers
> (who expect that dma_map_*(), with the importing device's dev ptr,
> will do something sane.

That could work, assuming that we could guarantee that our new replacement
domain got the context bank we wanted.

Jordan

> >
> > This is be used by [1] and [2] to leave context bank 0 open so that
> > the Adreno GPU can program it.
> >
> > [1] https://lists.linuxfoundation.org/pipermail/iommu/2020-January/041438.html
> > [2] https://lists.linuxfoundation.org/pipermail/iommu/2020-January/041444.html
> >
> > Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
> > ---
> >
> >  drivers/iommu/arm-smmu.c | 63 +++++++++++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 59 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> > index 85a6773..799a254 100644
> > --- a/drivers/iommu/arm-smmu.c
> > +++ b/drivers/iommu/arm-smmu.c
> > @@ -254,6 +254,43 @@ static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
> >         return idx;
> >  }
> >
> > +static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx);
> > +
> > +static int __arm_smmu_alloc_cb(struct arm_smmu_device *smmu, int start,
> > +               int target)
> > +{
> > +       int new, i;
> > +
> > +       /* Allocate a new context bank id */
> > +       new = __arm_smmu_alloc_bitmap(smmu->context_map, start,
> > +               smmu->num_context_banks);
> > +
> > +       if (new < 0)
> > +               return new;
> > +
> > +       /* If no target is set or we actually got the bank index we wanted */
> > +       if (target == -1 || new == target)
> > +               return new;
> > +
> > +       /* Copy the context configuration to the new index */
> > +       memcpy(&smmu->cbs[new], &smmu->cbs[target], sizeof(*smmu->cbs));
> > +       smmu->cbs[new].cfg->cbndx = new;
> > +
> > +       /* FIXME: Do we need locking here? */
> > +       for (i = 0; i < smmu->num_mapping_groups; i++) {
> > +               if (smmu->s2crs[i].cbndx == target) {
> > +                       smmu->s2crs[i].cbndx = new;
> > +                       arm_smmu_write_s2cr(smmu, i);
> > +               }
> > +       }
> > +
> > +       /*
> > +        * FIXME: Does getting here imply that 'target' is already set in the
> > +        * context_map?
> > +        */
> > +       return target;
> > +}
> > +
> >  static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
> >  {
> >         clear_bit(idx, map);
> > @@ -770,6 +807,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> >         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> >         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> >         unsigned long quirks = 0;
> > +       int forcecb = -1;
> >
> >         mutex_lock(&smmu_domain->init_mutex);
> >         if (smmu_domain->smmu)
> > @@ -844,8 +882,25 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> >                          * SEP_UPSTREAM so we don't need to reduce the size of
> >                          * the ias to account for the sign extension bit
> >                          */
> > -                       if (smmu_domain->split_pagetables)
> > -                               quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1;
> > +                       if (smmu_domain->split_pagetables) {
> > +                               /*
> > +                                * If split pagetables are enabled, assume that
> > +                                * the user's intent is to use per-instance
> > +                                * pagetables which, at least on a QCOM target,
> > +                                * means that this domain should be on context
> > +                                * bank 0.
> > +                                */
> > +
> > +                               /*
> > +                                * If we can't force to context bank 0 then
> > +                                * don't bother enabling split pagetables which
> > +                                * then would not allow aux domains
> > +                                */
> > +                               if (start == 0) {
> > +                                       forcecb = 0;
> > +                                       quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1;
> > +                               }
> > +                       }
> >                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
> >                         fmt = ARM_32_LPAE_S1;
> >                         ias = min(ias, 32UL);
> > @@ -883,8 +938,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> >                 ret = -EINVAL;
> >                 goto out_unlock;
> >         }
> > -       ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
> > -                                     smmu->num_context_banks);
> > +
> > +       ret = __arm_smmu_alloc_cb(smmu, start, forcecb);
> >         if (ret < 0)
> >                 goto out_unlock;
> >
> > --
> > 2.7.4

Patch
diff mbox series

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 85a6773..799a254 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -254,6 +254,43 @@  static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
 	return idx;
 }
 
+static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx);
+
+static int __arm_smmu_alloc_cb(struct arm_smmu_device *smmu, int start,
+		int target)
+{
+	int new, i;
+
+       /* Allocate a new context bank id */
+	new = __arm_smmu_alloc_bitmap(smmu->context_map, start,
+		smmu->num_context_banks);
+
+	if (new < 0)
+		return new;
+
+	/* If no target is set or we actually got the bank index we wanted */
+	if (target == -1 || new == target)
+		return new;
+
+	/* Copy the context configuration to the new index */
+	memcpy(&smmu->cbs[new], &smmu->cbs[target], sizeof(*smmu->cbs));
+	smmu->cbs[new].cfg->cbndx = new;
+
+	/* FIXME: Do we need locking here? */
+	for (i = 0; i < smmu->num_mapping_groups; i++) {
+		if (smmu->s2crs[i].cbndx == target) {
+			smmu->s2crs[i].cbndx = new;
+			arm_smmu_write_s2cr(smmu, i);
+		}
+	}
+
+	/*
+	 * FIXME: Does getting here imply that 'target' is already set in the
+	 * context_map?
+	 */
+	return target;
+}
+
 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
 {
 	clear_bit(idx, map);
@@ -770,6 +807,7 @@  static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
 	unsigned long quirks = 0;
+	int forcecb = -1;
 
 	mutex_lock(&smmu_domain->init_mutex);
 	if (smmu_domain->smmu)
@@ -844,8 +882,25 @@  static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 			 * SEP_UPSTREAM so we don't need to reduce the size of
 			 * the ias to account for the sign extension bit
 			 */
-			if (smmu_domain->split_pagetables)
-				quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1;
+			if (smmu_domain->split_pagetables) {
+				/*
+				 * If split pagetables are enabled, assume that
+				 * the user's intent is to use per-instance
+				 * pagetables which, at least on a QCOM target,
+				 * means that this domain should be on context
+				 * bank 0.
+				 */
+
+				/*
+				 * If we can't force to context bank 0 then
+				 * don't bother enabling split pagetables which
+				 * then would not allow aux domains
+				 */
+				if (start == 0) {
+					forcecb = 0;
+					quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1;
+				}
+			}
 		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
 			fmt = ARM_32_LPAE_S1;
 			ias = min(ias, 32UL);
@@ -883,8 +938,8 @@  static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 		ret = -EINVAL;
 		goto out_unlock;
 	}
-	ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
-				      smmu->num_context_banks);
+
+	ret = __arm_smmu_alloc_cb(smmu, start, forcecb);
 	if (ret < 0)
 		goto out_unlock;