diff mbox series

[v8,07/21] iommu/io-pgtable-arm-v7s: Extend MediaTek 4GB Mode

Message ID 1561774167-24141-8-git-send-email-yong.wu@mediatek.com (mailing list archive)
State New, archived
Headers show
Series MT8183 IOMMU SUPPORT | expand

Commit Message

Yong Wu (吴勇) June 29, 2019, 2:09 a.m. UTC
MediaTek extend the arm v7s descriptor to support the dram over 4GB.

In the mt2712 and mt8173, it's called "4GB mode", the physical address
is from 0x4000_0000 to 0x1_3fff_ffff, but from EMI point of view, it
is remapped to high address from 0x1_0000_0000 to 0x1_ffff_ffff, the
bit32 is always enabled. thus, in the M4U, we always enable the bit9
for all PTEs which means to enable bit32 of physical address.

but in mt8183, M4U support the dram from 0x4000_0000 to 0x3_ffff_ffff
which isn't remaped. We extend the PTEs: the bit9 represent bit32 of
PA and the bit4 represent bit33 of PA. Meanwhile the iova still is
32bits.

In order to unify code, in the "4GB mode", we add the bit32 for the
physical address manually in our driver.

Correspondingly, Adding bit32 and bit33 for the PA in the iova_to_phys
has to been moved into v7s.

Regarding whether the pagetable address could be over 4GB, the mt8183
support it while the previous mt8173 don't. thus keep it as is.

Signed-off-by: Yong Wu <yong.wu@mediatek.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Evan Green <evgreen@chromium.org>
---
Comparing with the previous one:
1). Add a new patch "iommu/mediatek: Fix iova_to_phys PA start for 4GB
mode" before this one. Thus rebase it.
A little difference: in the 4gb mode, we add bit32 for PA. and the PA got
from iova_to_phys always have bit32 here, thus we should adjust it to locate
the valid pa.
2). Add this code suggested from Evan.
 if (!data->plat_data->has_4gb_mode)
	       data->enable_4GB = false;
---
 drivers/iommu/io-pgtable-arm-v7s.c | 31 ++++++++++++++++++++++++-------
 drivers/iommu/mtk_iommu.c          | 29 ++++++++++++++++++-----------
 drivers/iommu/mtk_iommu.h          |  1 +
 3 files changed, 43 insertions(+), 18 deletions(-)

Comments

Will Deacon July 10, 2019, 2:36 p.m. UTC | #1
On Sat, Jun 29, 2019 at 10:09:13AM +0800, Yong Wu wrote:
> MediaTek extend the arm v7s descriptor to support the dram over 4GB.
> 
> In the mt2712 and mt8173, it's called "4GB mode", the physical address
> is from 0x4000_0000 to 0x1_3fff_ffff, but from EMI point of view, it
> is remapped to high address from 0x1_0000_0000 to 0x1_ffff_ffff, the
> bit32 is always enabled. thus, in the M4U, we always enable the bit9
> for all PTEs which means to enable bit32 of physical address.
> 
> but in mt8183, M4U support the dram from 0x4000_0000 to 0x3_ffff_ffff
> which isn't remaped. We extend the PTEs: the bit9 represent bit32 of
> PA and the bit4 represent bit33 of PA. Meanwhile the iova still is
> 32bits.

What happens if bit4 is set in the pte for mt2712 or mt8173? Perhaps the
io-pgtable backend should be allowing oas > 32 when
IO_PGTABLE_QUIRK_ARM_MTK_4GB is set, and then enforcing that itself.

> In order to unify code, in the "4GB mode", we add the bit32 for the
> physical address manually in our driver.
> 
> Correspondingly, Adding bit32 and bit33 for the PA in the iova_to_phys
> has to been moved into v7s.
> 
> Regarding whether the pagetable address could be over 4GB, the mt8183
> support it while the previous mt8173 don't. thus keep it as is.
> 
> Signed-off-by: Yong Wu <yong.wu@mediatek.com>
> Reviewed-by: Robin Murphy <robin.murphy@arm.com>
> Reviewed-by: Evan Green <evgreen@chromium.org>
> ---
> Comparing with the previous one:
> 1). Add a new patch "iommu/mediatek: Fix iova_to_phys PA start for 4GB
> mode" before this one. Thus rebase it.
> A little difference: in the 4gb mode, we add bit32 for PA. and the PA got
> from iova_to_phys always have bit32 here, thus we should adjust it to locate
> the valid pa.
> 2). Add this code suggested from Evan.
>  if (!data->plat_data->has_4gb_mode)
> 	       data->enable_4GB = false;
> ---
>  drivers/iommu/io-pgtable-arm-v7s.c | 31 ++++++++++++++++++++++++-------
>  drivers/iommu/mtk_iommu.c          | 29 ++++++++++++++++++-----------
>  drivers/iommu/mtk_iommu.h          |  1 +
>  3 files changed, 43 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
> index 94c38db..4077822 100644
> --- a/drivers/iommu/io-pgtable-arm-v7s.c
> +++ b/drivers/iommu/io-pgtable-arm-v7s.c
> @@ -123,7 +123,9 @@
>  #define ARM_V7S_TEX_MASK		0x7
>  #define ARM_V7S_ATTR_TEX(val)		(((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)
>  
> -#define ARM_V7S_ATTR_MTK_4GB		BIT(9) /* MTK extend it for 4GB mode */
> +/* MediaTek extend the two bits below for over 4GB mode */
> +#define ARM_V7S_ATTR_MTK_PA_BIT32	BIT(9)
> +#define ARM_V7S_ATTR_MTK_PA_BIT33	BIT(4)
>  
>  /* *well, except for TEX on level 2 large pages, of course :( */
>  #define ARM_V7S_CONT_PAGE_TEX_SHIFT	6
> @@ -190,13 +192,22 @@ static dma_addr_t __arm_v7s_dma_addr(void *pages)
>  static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
>  				    struct io_pgtable_cfg *cfg)
>  {
> -	return paddr & ARM_V7S_LVL_MASK(lvl);
> +	arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl);
> +
> +	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) {
> +		if (paddr & BIT_ULL(32))
> +			pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
> +		if (paddr & BIT_ULL(33))
> +			pte |= ARM_V7S_ATTR_MTK_PA_BIT33;
> +	}
> +	return pte;
>  }
>  
>  static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
>  				  struct io_pgtable_cfg *cfg)
>  {
>  	arm_v7s_iopte mask;
> +	phys_addr_t paddr;
>  
>  	if (ARM_V7S_PTE_IS_TABLE(pte, lvl))
>  		mask = ARM_V7S_TABLE_MASK;
> @@ -205,7 +216,14 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
>  	else
>  		mask = ARM_V7S_LVL_MASK(lvl);
>  
> -	return pte & mask;
> +	paddr = pte & mask;
> +	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) {
> +		if (pte & ARM_V7S_ATTR_MTK_PA_BIT32)
> +			paddr |= BIT_ULL(32);
> +		if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
> +			paddr |= BIT_ULL(33);
> +	}
> +	return paddr;

I think this relies on CONFIG_PHYS_ADDR_T_64BIT, which isn't always set on
32-bit ARM.

Will
Yong Wu (吴勇) July 11, 2019, 11:53 a.m. UTC | #2
On Wed, 2019-07-10 at 15:36 +0100, Will Deacon wrote:
> On Sat, Jun 29, 2019 at 10:09:13AM +0800, Yong Wu wrote:
> > MediaTek extend the arm v7s descriptor to support the dram over 4GB.
> > 
> > In the mt2712 and mt8173, it's called "4GB mode", the physical address
> > is from 0x4000_0000 to 0x1_3fff_ffff, but from EMI point of view, it
> > is remapped to high address from 0x1_0000_0000 to 0x1_ffff_ffff, the
> > bit32 is always enabled. thus, in the M4U, we always enable the bit9
> > for all PTEs which means to enable bit32 of physical address.
> > 
> > but in mt8183, M4U support the dram from 0x4000_0000 to 0x3_ffff_ffff
> > which isn't remaped. We extend the PTEs: the bit9 represent bit32 of
> > PA and the bit4 represent bit33 of PA. Meanwhile the iova still is
> > 32bits.
> 
> What happens if bit4 is set in the pte for mt2712 or mt8173? Perhaps the

bit4 is ignored in mt2712 and mt8173(No effect).

> io-pgtable backend should be allowing oas > 32 when
> IO_PGTABLE_QUIRK_ARM_MTK_4GB is set, and then enforcing that itself.

About oas, It looks the oas doesn't work in current the v7s. 

How about I add a new simple preparing patch like this(copy from
io-pgtable-arm.c)?

==========================================
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -495,7 +495,8 @@ static int arm_v7s_map(struct io_pgtable_ops *ops,
unsigned long iova,
        if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
                return 0;

-       if (WARN_ON(upper_32_bits(iova) || upper_32_bits(paddr)))
+       if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
+                   paddr >= (1ULL << data->iop.cfg.oas)))
                return -ERANGE;

===============================================

Then, change the oas in MTK 4GB mode, like this:

================================================
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -721,7 +721,9 @@ static struct io_pgtable
*arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 {
        struct arm_v7s_io_pgtable *data;

-       if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas >
ARM_V7S_ADDR_BITS)
+       if (cfg->ias > ARM_V7S_ADDR_BITS ||
+           (cfg->oas > ARM_V7S_ADDR_BITS &&
+            !(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)))
                return NULL;

        if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |

--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -274,7 +274,7 @@ static int mtk_iommu_domain_finalise(struct
mtk_iommu_domain *dom)
                        IO_PGTABLE_QUIRK_TLBI_ON_MAP,
                .pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
                .ias = 32,
-               .oas = 32,
+               .oas = 34,
                .tlb = &mtk_iommu_gather_ops,
                .iommu_dev = data->dev,
        };
================================================


> 
> > In order to unify code, in the "4GB mode", we add the bit32 for the
> > physical address manually in our driver.
> > 
> > Correspondingly, Adding bit32 and bit33 for the PA in the iova_to_phys
> > has to been moved into v7s.
> > 
> > Regarding whether the pagetable address could be over 4GB, the mt8183
> > support it while the previous mt8173 don't. thus keep it as is.
> > 
> > Signed-off-by: Yong Wu <yong.wu@mediatek.com>
> > Reviewed-by: Robin Murphy <robin.murphy@arm.com>
> > Reviewed-by: Evan Green <evgreen@chromium.org>
> > ---
> > Comparing with the previous one:
> > 1). Add a new patch "iommu/mediatek: Fix iova_to_phys PA start for 4GB
> > mode" before this one. Thus rebase it.
> > A little difference: in the 4gb mode, we add bit32 for PA. and the PA got
> > from iova_to_phys always have bit32 here, thus we should adjust it to locate
> > the valid pa.
> > 2). Add this code suggested from Evan.
> >  if (!data->plat_data->has_4gb_mode)
> > 	       data->enable_4GB = false;
> > ---
> >  drivers/iommu/io-pgtable-arm-v7s.c | 31 ++++++++++++++++++++++++-------
> >  drivers/iommu/mtk_iommu.c          | 29 ++++++++++++++++++-----------
> >  drivers/iommu/mtk_iommu.h          |  1 +
> >  3 files changed, 43 insertions(+), 18 deletions(-)
> > 
> > diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
> > index 94c38db..4077822 100644
> > --- a/drivers/iommu/io-pgtable-arm-v7s.c
> > +++ b/drivers/iommu/io-pgtable-arm-v7s.c
> > @@ -123,7 +123,9 @@
> >  #define ARM_V7S_TEX_MASK		0x7
> >  #define ARM_V7S_ATTR_TEX(val)		(((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)
> >  
> > -#define ARM_V7S_ATTR_MTK_4GB		BIT(9) /* MTK extend it for 4GB mode */
> > +/* MediaTek extend the two bits below for over 4GB mode */
> > +#define ARM_V7S_ATTR_MTK_PA_BIT32	BIT(9)
> > +#define ARM_V7S_ATTR_MTK_PA_BIT33	BIT(4)
> >  
> >  /* *well, except for TEX on level 2 large pages, of course :( */
> >  #define ARM_V7S_CONT_PAGE_TEX_SHIFT	6
> > @@ -190,13 +192,22 @@ static dma_addr_t __arm_v7s_dma_addr(void *pages)
> >  static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
> >  				    struct io_pgtable_cfg *cfg)
> >  {
> > -	return paddr & ARM_V7S_LVL_MASK(lvl);
> > +	arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl);
> > +
> > +	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) {
> > +		if (paddr & BIT_ULL(32))
> > +			pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
> > +		if (paddr & BIT_ULL(33))
> > +			pte |= ARM_V7S_ATTR_MTK_PA_BIT33;
> > +	}
> > +	return pte;
> >  }
> >  
> >  static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
> >  				  struct io_pgtable_cfg *cfg)
> >  {
> >  	arm_v7s_iopte mask;
> > +	phys_addr_t paddr;
> >  
> >  	if (ARM_V7S_PTE_IS_TABLE(pte, lvl))
> >  		mask = ARM_V7S_TABLE_MASK;
> > @@ -205,7 +216,14 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
> >  	else
> >  		mask = ARM_V7S_LVL_MASK(lvl);
> >  
> > -	return pte & mask;
> > +	paddr = pte & mask;
> > +	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) {
> > +		if (pte & ARM_V7S_ATTR_MTK_PA_BIT32)
> > +			paddr |= BIT_ULL(32);
> > +		if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
> > +			paddr |= BIT_ULL(33);
> > +	}
> > +	return paddr;
> 
> I think this relies on CONFIG_PHYS_ADDR_T_64BIT, which isn't always set on
> 32-bit ARM.

This was discussed at [1]. Robin commented that this is not needed and
build won't complain about this.

[1]
http://lists.infradead.org/pipermail/linux-mediatek/2018-November/015688.html

> 
> Will
Will Deacon July 11, 2019, 12:31 p.m. UTC | #3
On Thu, Jul 11, 2019 at 07:53:56PM +0800, Yong Wu wrote:
> On Wed, 2019-07-10 at 15:36 +0100, Will Deacon wrote:
> > On Sat, Jun 29, 2019 at 10:09:13AM +0800, Yong Wu wrote:
> > > MediaTek extend the arm v7s descriptor to support the dram over 4GB.
> > > 
> > > In the mt2712 and mt8173, it's called "4GB mode", the physical address
> > > is from 0x4000_0000 to 0x1_3fff_ffff, but from EMI point of view, it
> > > is remapped to high address from 0x1_0000_0000 to 0x1_ffff_ffff, the
> > > bit32 is always enabled. thus, in the M4U, we always enable the bit9
> > > for all PTEs which means to enable bit32 of physical address.
> > > 
> > > but in mt8183, M4U support the dram from 0x4000_0000 to 0x3_ffff_ffff
> > > which isn't remaped. We extend the PTEs: the bit9 represent bit32 of
> > > PA and the bit4 represent bit33 of PA. Meanwhile the iova still is
> > > 32bits.
> > 
> > What happens if bit4 is set in the pte for mt2712 or mt8173? Perhaps the
> 
> bit4 is ignored in mt2712 and mt8173(No effect).
> 
> > io-pgtable backend should be allowing oas > 32 when
> > IO_PGTABLE_QUIRK_ARM_MTK_4GB is set, and then enforcing that itself.
> 
> About oas, It looks the oas doesn't work in current the v7s. 
> 
> How about I add a new simple preparing patch like this(copy from
> io-pgtable-arm.c)?

This looks like the right sort of idea. Basically, I was thinking that you
can use the oas in conjunction with the quirk to specify whether or not
your two magic bits should be set. You could also then cap the oas using
the size of phys_addr_t to deal with my other comment.

Finally, I was hoping you could drop the |= BIT_ULL(32) and the &=
~BIT_ULL(32) bits of the mtk driver if the pgtable code now accepts higher
addresses. Did that not work out?
> 
> ==========================================
> --- a/drivers/iommu/io-pgtable-arm-v7s.c
> +++ b/drivers/iommu/io-pgtable-arm-v7s.c
> @@ -495,7 +495,8 @@ static int arm_v7s_map(struct io_pgtable_ops *ops,
> unsigned long iova,
>         if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
>                 return 0;
> 
> -       if (WARN_ON(upper_32_bits(iova) || upper_32_bits(paddr)))
> +       if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
> +                   paddr >= (1ULL << data->iop.cfg.oas)))
>                 return -ERANGE;
> 
> ===============================================
> 
> Then, change the oas in MTK 4GB mode, like this:
> 
> ================================================
> --- a/drivers/iommu/io-pgtable-arm-v7s.c
> +++ b/drivers/iommu/io-pgtable-arm-v7s.c
> @@ -721,7 +721,9 @@ static struct io_pgtable
> *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
>  {
>         struct arm_v7s_io_pgtable *data;
> 
> -       if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas >
> ARM_V7S_ADDR_BITS)
> +       if (cfg->ias > ARM_V7S_ADDR_BITS ||
> +           (cfg->oas > ARM_V7S_ADDR_BITS &&
> +            !(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)))

This should probably still be capped at 34 bits.

> > > +	paddr = pte & mask;
> > > +	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) {
> > > +		if (pte & ARM_V7S_ATTR_MTK_PA_BIT32)
> > > +			paddr |= BIT_ULL(32);
> > > +		if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
> > > +			paddr |= BIT_ULL(33);
> > > +	}
> > > +	return paddr;
> > 
> > I think this relies on CONFIG_PHYS_ADDR_T_64BIT, which isn't always set on
> > 32-bit ARM.
> 
> This was discussed at [1]. Robin commented that this is not needed and
> build won't complain about this.

It's not so much the build I was worried about, but more that we'd silently
be doing the wrong thing and I think we can fix that as I mentioned above.

Will
Yong Wu (吴勇) July 14, 2019, 4:41 a.m. UTC | #4
On Thu, 2019-07-11 at 13:31 +0100, Will Deacon wrote:
> On Thu, Jul 11, 2019 at 07:53:56PM +0800, Yong Wu wrote:
> > On Wed, 2019-07-10 at 15:36 +0100, Will Deacon wrote:
> > > On Sat, Jun 29, 2019 at 10:09:13AM +0800, Yong Wu wrote:
> > > > MediaTek extend the arm v7s descriptor to support the dram over 4GB.
> > > > 
> > > > In the mt2712 and mt8173, it's called "4GB mode", the physical address
> > > > is from 0x4000_0000 to 0x1_3fff_ffff, but from EMI point of view, it
> > > > is remapped to high address from 0x1_0000_0000 to 0x1_ffff_ffff, the
> > > > bit32 is always enabled. thus, in the M4U, we always enable the bit9
> > > > for all PTEs which means to enable bit32 of physical address.
> > > > 
> > > > but in mt8183, M4U support the dram from 0x4000_0000 to 0x3_ffff_ffff
> > > > which isn't remaped. We extend the PTEs: the bit9 represent bit32 of
> > > > PA and the bit4 represent bit33 of PA. Meanwhile the iova still is
> > > > 32bits.
> > > 
> > > What happens if bit4 is set in the pte for mt2712 or mt8173? Perhaps the
> > 
> > bit4 is ignored in mt2712 and mt8173(No effect).
> > 
> > > io-pgtable backend should be allowing oas > 32 when
> > > IO_PGTABLE_QUIRK_ARM_MTK_4GB is set, and then enforcing that itself.
> > 
> > About oas, It looks the oas doesn't work in current the v7s. 
> > 
> > How about I add a new simple preparing patch like this(copy from
> > io-pgtable-arm.c)?
> 
> This looks like the right sort of idea. Basically, I was thinking that you
> can use the oas in conjunction with the quirk to specify whether or not
> your two magic bits should be set. You could also then cap the oas using
> the size of phys_addr_t to deal with my other comment.
> 
> Finally, I was hoping you could drop the |= BIT_ULL(32) and the &=
> ~BIT_ULL(32) bits of the mtk driver if the pgtable code now accepts higher
> addresses. Did that not work out?


After the current patch, the pgtable has accepted the higher address.
the " |= BIT_ULL(32)" and "& = ~ BIT_ULL(32)" is for a special case(we
call it 4GB mode).

Now MediaTek IOMMU support 2 kind memory:
1) normal case: PA is 0x4000_0000 - 0x3_ffff_ffff. the PA won't be
remapped. mt8183 and the non-4GB mode of mt8173/mt2712 use this mode.

2) 4GB Mode: PA is 0x4000_0000 - 0x1_3fff_ffff. But the PA will remapped
to 0x1_0000_0000 to 0x1_ffff_ffff. This is for the 4GB mode of
mt8173/mt2712. This case is so special that we should change the PA
manually(add bit32).
(mt2712 and mt8173 have both mode: 4GB and non-4GB.)

If we try to use oas and our quirk to cover this two case. Then I can
use "oas == 33" only for this 4GB mode. and "oas == 34" for the normal
case even though the PA mayn't reach 34bit. Also I should add some
"workaround" for the 4GB mode(oas==33).

I copy the new patch in the mail below(have dropped the "|= BIT_ULL(32)"
and the "&= ~BIT_ULL(32)) in mtk iommu". please help have a look if it
is ok.
(another thing: Current the PA can support over 4GB. So the quirk name
"MTK_4GB" looks not suitable, I used a new patch rename to "MTK_EXT").

> > 
> > ==========================================
> > --- a/drivers/iommu/io-pgtable-arm-v7s.c
> > +++ b/drivers/iommu/io-pgtable-arm-v7s.c
> > @@ -495,7 +495,8 @@ static int arm_v7s_map(struct io_pgtable_ops *ops,
> > unsigned long iova,
> >         if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
> >                 return 0;
> > 
> > -       if (WARN_ON(upper_32_bits(iova) || upper_32_bits(paddr)))
> > +       if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
> > +                   paddr >= (1ULL << data->iop.cfg.oas)))
> >                 return -ERANGE;
> > 
> > ===============================================
> > 
> > Then, change the oas in MTK 4GB mode, like this:
> > 
> > ================================================
> > --- a/drivers/iommu/io-pgtable-arm-v7s.c
> > +++ b/drivers/iommu/io-pgtable-arm-v7s.c
> > @@ -721,7 +721,9 @@ static struct io_pgtable
> > *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
> >  {
> >         struct arm_v7s_io_pgtable *data;
> > 
> > -       if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas >
> > ARM_V7S_ADDR_BITS)
> > +       if (cfg->ias > ARM_V7S_ADDR_BITS ||
> > +           (cfg->oas > ARM_V7S_ADDR_BITS &&
> > +            !(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)))
> 
> This should probably still be capped at 34 bits.

Don't get here. This is only a simple checking (if (oas > 32) return
NULL). I should avoid this checking for our case.

> 
> > > > +	paddr = pte & mask;
> > > > +	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) {
> > > > +		if (pte & ARM_V7S_ATTR_MTK_PA_BIT32)
> > > > +			paddr |= BIT_ULL(32);
> > > > +		if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
> > > > +			paddr |= BIT_ULL(33);
> > > > +	}
> > > > +	return paddr;
> > > 
> > > I think this relies on CONFIG_PHYS_ADDR_T_64BIT, which isn't always set on
> > > 32-bit ARM.
> > 
> > This was discussed at [1]. Robin commented that this is not needed and
> > build won't complain about this.
> 
> It's not so much the build I was worried about, but more that we'd silently
> be doing the wrong thing and I think we can fix that as I mentioned above.

OK. see below.

> 
> Will
> 

===================================
-#define ARM_V7S_ATTR_MTK_4GB  BIT(9) /* MTK extend it for 4GB mode */
+/* MediaTek extend the two bits for PA 32bit/33bit */
+#define ARM_V7S_ATTR_MTK_PA_BIT32  BIT(9)
+#define ARM_V7S_ATTR_MTK_PA_BIT33  BIT(4)
 
 /* *well, except for TEX on level 2 large pages, of course :( */
 #define ARM_V7S_CONT_PAGE_TEX_SHIFT 6
@@ -190,13 +192,22 @@ static dma_addr_t __arm_v7s_dma_addr(void *pages)
 static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
 				    struct io_pgtable_cfg *cfg)
 {
-	return paddr & ARM_V7S_LVL_MASK(lvl);
+	arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl);
+
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT) {
+		if ((paddr & BIT_ULL(32)) || cfg->oas == 33 /* 4GB mode */)
+			pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
+		if (paddr & BIT_ULL(33))
+			pte |= ARM_V7S_ATTR_MTK_PA_BIT33;
+	}
+	return pte;
 }
 
 static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
 				  struct io_pgtable_cfg *cfg)
 {
 	arm_v7s_iopte mask;
+	phys_addr_t paddr;
 
 	if (ARM_V7S_PTE_IS_TABLE(pte, lvl))
 		mask = ARM_V7S_TABLE_MASK;
@@ -205,7 +216,20 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte
pte, int lvl,
 	else
 		mask = ARM_V7S_LVL_MASK(lvl);
 
-	return pte & mask;
+	paddr = pte & mask;
+	if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) &&
+	    (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)) {
+		/*
+		 * Workaround for MTK 4GB Mode:
+		 * Add BIT32 only when PA < 0x4000_0000.
+		 */
+		if ((cfg->oas == 33 && paddr < 0x40000000UL) ||
+		    (cfg->oas > 33 && (pte & ARM_V7S_ATTR_MTK_PA_BIT32)))
+			paddr |= BIT_ULL(32);
+		if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
+			paddr |= BIT_ULL(33);
+	}
+	return paddr;
 }
 
 static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl,
@@ -326,9 +350,6 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot,
int lvl,
 	if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
 		pte |= ARM_V7S_ATTR_NS_SECTION;
 
-	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)
-		pte |= ARM_V7S_ATTR_MTK_4GB;
-
 	return pte;
 }
 
@@ -742,7 +763,9 @@ static struct io_pgtable
*arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 {
 	struct arm_v7s_io_pgtable *data;
 
-	if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
+	if (cfg->ias > ARM_V7S_ADDR_BITS ||
+	    (cfg->oas > ARM_V7S_ADDR_BITS &&
+	     !(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)))
 		return NULL;
 
 	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 85e71fb..1a141ea 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -271,16 +271,18 @@ static int mtk_iommu_domain_finalise(struct
mtk_iommu_domain *dom)
 	dom->cfg = (struct io_pgtable_cfg) {
 		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
 			IO_PGTABLE_QUIRK_NO_PERMS |
-			IO_PGTABLE_QUIRK_TLBI_ON_MAP,
+			IO_PGTABLE_QUIRK_TLBI_ON_MAP |
+			IO_PGTABLE_QUIRK_ARM_MTK_EXT,
 		.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
 		.ias = 32,
-		.oas = 32,
 		.tlb = &mtk_iommu_gather_ops,
 		.iommu_dev = data->dev,
 	};
 
 	if (data->enable_4GB)
-		dom->cfg.quirks |= IO_PGTABLE_QUIRK_ARM_MTK_EXT;
+		dom->cfg.oas = 33; /* Only for 4GB mode */
+	else
+		dom->cfg.oas = 34;
 
 	dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
 	if (!dom->iop) {
@@ -371,8 +373,7 @@ static int mtk_iommu_map(struct iommu_domain
*domain, unsigned long iova,
 	int ret;
 
 	spin_lock_irqsave(&dom->pgtlock, flags);
-	ret = dom->iop->map(dom->iop, iova, paddr & DMA_BIT_MASK(32),
-			    size, prot);
+	ret = dom->iop->map(dom->iop, iova, paddr, size, prot);
 	spin_unlock_irqrestore(&dom->pgtlock, flags);
 
 	return ret;
@@ -401,7 +402,6 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct
iommu_domain *domain,
 					  dma_addr_t iova)
 {
 	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
-	struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
 	unsigned long flags;
 	phys_addr_t pa;
 
@@ -409,9 +409,6 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct
iommu_domain *domain,
 	pa = dom->iop->iova_to_phys(dom->iop, iova);
 	spin_unlock_irqrestore(&dom->pgtlock, flags);
 
-	if (data->enable_4GB)
-		pa |= BIT_ULL(32);
-
 	return pa;
 } 
===================================
> _______________________________________________
> Linux-mediatek mailing list
> Linux-mediatek@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-mediatek
Will Deacon July 15, 2019, 9:51 a.m. UTC | #5
On Sun, Jul 14, 2019 at 12:41:20PM +0800, Yong Wu wrote:
> On Thu, 2019-07-11 at 13:31 +0100, Will Deacon wrote:
> > This looks like the right sort of idea. Basically, I was thinking that you
> > can use the oas in conjunction with the quirk to specify whether or not
> > your two magic bits should be set. You could also then cap the oas using
> > the size of phys_addr_t to deal with my other comment.
> > 
> > Finally, I was hoping you could drop the |= BIT_ULL(32) and the &=
> > ~BIT_ULL(32) bits of the mtk driver if the pgtable code now accepts higher
> > addresses. Did that not work out?
> 
> After the current patch, the pgtable has accepted the higher address.
> the " |= BIT_ULL(32)" and "& = ~ BIT_ULL(32)" is for a special case(we
> call it 4GB mode).
> 
> Now MediaTek IOMMU support 2 kind memory:
> 1) normal case: PA is 0x4000_0000 - 0x3_ffff_ffff. the PA won't be
> remapped. mt8183 and the non-4GB mode of mt8173/mt2712 use this mode.
> 
> 2) 4GB Mode: PA is 0x4000_0000 - 0x1_3fff_ffff. But the PA will remapped
> to 0x1_0000_0000 to 0x1_ffff_ffff. This is for the 4GB mode of
> mt8173/mt2712. This case is so special that we should change the PA
> manually(add bit32).
> (mt2712 and mt8173 have both mode: 4GB and non-4GB.)
> 
> If we try to use oas and our quirk to cover this two case. Then I can
> use "oas == 33" only for this 4GB mode. and "oas == 34" for the normal
> case even though the PA mayn't reach 34bit. Also I should add some
> "workaround" for the 4GB mode(oas==33).
> 
> I copy the new patch in the mail below(have dropped the "|= BIT_ULL(32)"
> and the "&= ~BIT_ULL(32)) in mtk iommu". please help have a look if it
> is ok.
> (another thing: Current the PA can support over 4GB. So the quirk name
> "MTK_4GB" looks not suitable, I used a new patch rename to "MTK_EXT").

Makes sense, thanks. One comment below.

> @@ -205,7 +216,20 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte
> pte, int lvl,
>  	else
>  		mask = ARM_V7S_LVL_MASK(lvl);
>  
> -	return pte & mask;
> +	paddr = pte & mask;
> +	if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) &&
> +	    (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)) {
> +		/*
> +		 * Workaround for MTK 4GB Mode:
> +		 * Add BIT32 only when PA < 0x4000_0000.
> +		 */
> +		if ((cfg->oas == 33 && paddr < 0x40000000UL) ||
> +		    (cfg->oas > 33 && (pte & ARM_V7S_ATTR_MTK_PA_BIT32)))
> +			paddr |= BIT_ULL(32);
> +		if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
> +			paddr |= BIT_ULL(33);
> +	}
> +	return paddr;
>  }
>  
>  static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl,
> @@ -326,9 +350,6 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot,
> int lvl,
>  	if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
>  		pte |= ARM_V7S_ATTR_NS_SECTION;
>  
> -	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)
> -		pte |= ARM_V7S_ATTR_MTK_4GB;
> -
>  	return pte;
>  }
>  
> @@ -742,7 +763,9 @@ static struct io_pgtable
> *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
>  {
>  	struct arm_v7s_io_pgtable *data;
>  
> -	if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
> +	if (cfg->ias > ARM_V7S_ADDR_BITS ||
> +	    (cfg->oas > ARM_V7S_ADDR_BITS &&
> +	     !(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)))
>  		return NULL;

I think you can rework this to do something like:

	if (cfg->ias > ARM_V7S_ADDR_BITS)
		return NULL;

	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT) {
		if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
			cfg->oas = min(cfg->oas, ARM_V7S_ADDR_BITS);
		else if (cfg->oas > 34)
			return NULL;
	} else if (cfg->oas > ARM_V7S_ADDR_BITS) {
		return NULL;
	}

so that we clamp the oas when phys_addr_t is 32-bit for you. That should
allow you to remove lots of the checking from iopte_to_paddr() too if you
check against oas in the map() function.

Does that make sense?

Will
Yong Wu (吴勇) July 17, 2019, 12:44 p.m. UTC | #6
On Mon, 2019-07-15 at 10:51 +0100, Will Deacon wrote:
> On Sun, Jul 14, 2019 at 12:41:20PM +0800, Yong Wu wrote:
> > On Thu, 2019-07-11 at 13:31 +0100, Will Deacon wrote:
> > > This looks like the right sort of idea. Basically, I was thinking that you
> > > can use the oas in conjunction with the quirk to specify whether or not
> > > your two magic bits should be set. You could also then cap the oas using
> > > the size of phys_addr_t to deal with my other comment.
> > > 
> > > Finally, I was hoping you could drop the |= BIT_ULL(32) and the &=
> > > ~BIT_ULL(32) bits of the mtk driver if the pgtable code now accepts higher
> > > addresses. Did that not work out?
> > 
> > After the current patch, the pgtable has accepted the higher address.
> > the " |= BIT_ULL(32)" and "& = ~ BIT_ULL(32)" is for a special case(we
> > call it 4GB mode).
> > 
> > Now MediaTek IOMMU support 2 kind memory:
> > 1) normal case: PA is 0x4000_0000 - 0x3_ffff_ffff. the PA won't be
> > remapped. mt8183 and the non-4GB mode of mt8173/mt2712 use this mode.
> > 
> > 2) 4GB Mode: PA is 0x4000_0000 - 0x1_3fff_ffff. But the PA will remapped
> > to 0x1_0000_0000 to 0x1_ffff_ffff. This is for the 4GB mode of
> > mt8173/mt2712. This case is so special that we should change the PA
> > manually(add bit32).
> > (mt2712 and mt8173 have both mode: 4GB and non-4GB.)
> > 
> > If we try to use oas and our quirk to cover this two case. Then I can
> > use "oas == 33" only for this 4GB mode. and "oas == 34" for the normal
> > case even though the PA mayn't reach 34bit. Also I should add some
> > "workaround" for the 4GB mode(oas==33).
> > 
> > I copy the new patch in the mail below(have dropped the "|= BIT_ULL(32)"
> > and the "&= ~BIT_ULL(32)) in mtk iommu". please help have a look if it
> > is ok.
> > (another thing: Current the PA can support over 4GB. So the quirk name
> > "MTK_4GB" looks not suitable, I used a new patch rename to "MTK_EXT").
> 
> Makes sense, thanks. One comment below.
> 
> > @@ -205,7 +216,20 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte
> > pte, int lvl,
> >  	else
> >  		mask = ARM_V7S_LVL_MASK(lvl);
> >  
> > -	return pte & mask;
> > +	paddr = pte & mask;
> > +	if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) &&
> > +	    (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)) {
> > +		/*
> > +		 * Workaround for MTK 4GB Mode:
> > +		 * Add BIT32 only when PA < 0x4000_0000.
> > +		 */
> > +		if ((cfg->oas == 33 && paddr < 0x40000000UL) ||
> > +		    (cfg->oas > 33 && (pte & ARM_V7S_ATTR_MTK_PA_BIT32)))
> > +			paddr |= BIT_ULL(32);
> > +		if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
> > +			paddr |= BIT_ULL(33);
> > +	}
> > +	return paddr;
> >  }
> >  
> >  static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl,
> > @@ -326,9 +350,6 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot,
> > int lvl,
> >  	if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
> >  		pte |= ARM_V7S_ATTR_NS_SECTION;
> >  
> > -	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)
> > -		pte |= ARM_V7S_ATTR_MTK_4GB;
> > -
> >  	return pte;
> >  }
> >  
> > @@ -742,7 +763,9 @@ static struct io_pgtable
> > *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
> >  {
> >  	struct arm_v7s_io_pgtable *data;
> >  
> > -	if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
> > +	if (cfg->ias > ARM_V7S_ADDR_BITS ||
> > +	    (cfg->oas > ARM_V7S_ADDR_BITS &&
> > +	     !(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)))
> >  		return NULL;
> 
> I think you can rework this to do something like:
> 
> 	if (cfg->ias > ARM_V7S_ADDR_BITS)
> 		return NULL;
> 
> 	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT) {
> 		if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
> 			cfg->oas = min(cfg->oas, ARM_V7S_ADDR_BITS);
> 		else if (cfg->oas > 34)
> 			return NULL;
> 	} else if (cfg->oas > ARM_V7S_ADDR_BITS) {
> 		return NULL;
> 	}
> 
> so that we clamp the oas when phys_addr_t is 32-bit for you. That should
> allow you to remove lots of the checking from iopte_to_paddr() too if you
> check against oas in the map() function.
> 
> Does that make sense?

Of course I'm ok for this. I'm only afraid that this function has
already 3 checking "if (x) return NULL", Here we add a new one and so
many lines... Maybe the user should guarantee the right value of oas.
How about move it into mtk_iommu.c?

About the checking of iopte_to_paddr, I can not remove them. I know it
may be a bit special and not readable. Hmm, I guess I should use a MACRO
instead of the hard code 33 for the special 4GB mode case.

Then the patch would be like:

//=========================
static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
 				  struct io_pgtable_cfg *cfg)
 {
 	arm_v7s_iopte mask;
+	phys_addr_t paddr;
 
 	if (ARM_V7S_PTE_IS_TABLE(pte, lvl))
 		mask = ARM_V7S_TABLE_MASK;
@@ -205,7 +216,21 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte
pte, int lvl,
 	else
 		mask = ARM_V7S_LVL_MASK(lvl);
 
-	return pte & mask;
+	paddr = pte & mask;
+	if (cfg->oas > ARM_V7S_ADDR_BITS &&
+	    (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)) {
+		/*
+		 * Workaround for MTK 4GB Mode:
+		 * Add BIT32 only when PA < 0x4000_0000.
+		 */
+		if (cfg->oas == ARM_V7S_MTK_4GB_OAS && paddr < 0x40000000UL)
+			paddr |= BIT_ULL(32);
+		else if (pte & ARM_V7S_ATTR_MTK_PA_BIT32)
+			paddr |= BIT_ULL(32);
+		if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
+			paddr |= BIT_ULL(33);
+	}
+	return paddr;
 } 

@@ -741,8 +763,10 @@ static struct io_pgtable
*arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 						void *cookie)
 {
 	struct arm_v7s_io_pgtable *data;
-
-	if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
+ 	
+	if (cfg->ias > ARM_V7S_ADDR_BITS ||
+	    (cfg->oas > ARM_V7S_ADDR_BITS &&
+	     !(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)))
 		return NULL;
 
 	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 85e71fb..4efc5a3 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -271,16 +271,20 @@ static int mtk_iommu_domain_finalise(struct
mtk_iommu_domain *dom)
 	dom->cfg = (struct io_pgtable_cfg) {
 		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
 			IO_PGTABLE_QUIRK_NO_PERMS |
-			IO_PGTABLE_QUIRK_TLBI_ON_MAP,
+			IO_PGTABLE_QUIRK_TLBI_ON_MAP |
+			IO_PGTABLE_QUIRK_ARM_MTK_EXT,
 		.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
 		.ias = 32,
-		.oas = 32,
 		.tlb = &mtk_iommu_gather_ops,
 		.iommu_dev = data->dev,
 	};
 
-	if (data->enable_4GB)
-		dom->cfg.quirks |= IO_PGTABLE_QUIRK_ARM_MTK_EXT;
+	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
+		dom->cfg.oas = 32;
+	else if (data->enable_4GB)
+		dom->cfg.oas = ARM_V7S_MTK_4GB_OAS;
+	else
+		dom->cfg.oas = 34;

--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -117,6 +116,8 @@ struct io_pgtable_cfg {
 	};
 };
 
+#define ARM_V7S_MTK_4GB_OAS			33
+
 /**
  * struct io_pgtable_ops - Page table manipulation API for IOMMU
drivers.
  *
=====================================
> 
> Will
Will Deacon July 17, 2019, 2:23 p.m. UTC | #7
On Wed, Jul 17, 2019 at 08:44:19PM +0800, Yong Wu wrote:
> On Mon, 2019-07-15 at 10:51 +0100, Will Deacon wrote:
> > On Sun, Jul 14, 2019 at 12:41:20PM +0800, Yong Wu wrote:
> > > @@ -742,7 +763,9 @@ static struct io_pgtable
> > > *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
> > >  {
> > >  	struct arm_v7s_io_pgtable *data;
> > >  
> > > -	if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
> > > +	if (cfg->ias > ARM_V7S_ADDR_BITS ||
> > > +	    (cfg->oas > ARM_V7S_ADDR_BITS &&
> > > +	     !(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)))
> > >  		return NULL;
> > 
> > I think you can rework this to do something like:
> > 
> > 	if (cfg->ias > ARM_V7S_ADDR_BITS)
> > 		return NULL;
> > 
> > 	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT) {
> > 		if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
> > 			cfg->oas = min(cfg->oas, ARM_V7S_ADDR_BITS);
> > 		else if (cfg->oas > 34)
> > 			return NULL;
> > 	} else if (cfg->oas > ARM_V7S_ADDR_BITS) {
> > 		return NULL;
> > 	}
> > 
> > so that we clamp the oas when phys_addr_t is 32-bit for you. That should
> > allow you to remove lots of the checking from iopte_to_paddr() too if you
> > check against oas in the map() function.
> > 
> > Does that make sense?
> 
> Of course I'm ok for this. I'm only afraid that this function has
> already 3 checking "if (x) return NULL", Here we add a new one and so
> many lines... Maybe the user should guarantee the right value of oas.
> How about move it into mtk_iommu.c?
> 
> About the checking of iopte_to_paddr, I can not remove them. I know it
> may be a bit special and not readable. Hmm, I guess I should use a MACRO
> instead of the hard code 33 for the special 4GB mode case.

Why can't you just do something like:

	if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT))
		return paddr;

	if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
		paddr |= BIT_ULL(33);

	if (pte & ARM_V&S_ATTR_MTK_PA_BIT32)
		paddr |= BIT_ULL(32);

	return paddr;

The diff I sent previously sanitises the oas at init time, and then you
can just enforce it in map().

Will
Yong Wu (吴勇) July 18, 2019, 5:37 a.m. UTC | #8
On Wed, 2019-07-17 at 15:23 +0100, Will Deacon wrote:
> On Wed, Jul 17, 2019 at 08:44:19PM +0800, Yong Wu wrote:
> > On Mon, 2019-07-15 at 10:51 +0100, Will Deacon wrote:
> > > On Sun, Jul 14, 2019 at 12:41:20PM +0800, Yong Wu wrote:
> > > > @@ -742,7 +763,9 @@ static struct io_pgtable
> > > > *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
> > > >  {
> > > >  	struct arm_v7s_io_pgtable *data;
> > > >  
> > > > -	if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
> > > > +	if (cfg->ias > ARM_V7S_ADDR_BITS ||
> > > > +	    (cfg->oas > ARM_V7S_ADDR_BITS &&
> > > > +	     !(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)))
> > > >  		return NULL;
> > > 
> > > I think you can rework this to do something like:
> > > 
> > > 	if (cfg->ias > ARM_V7S_ADDR_BITS)
> > > 		return NULL;
> > > 
> > > 	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT) {
> > > 		if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
> > > 			cfg->oas = min(cfg->oas, ARM_V7S_ADDR_BITS);
> > > 		else if (cfg->oas > 34)
> > > 			return NULL;
> > > 	} else if (cfg->oas > ARM_V7S_ADDR_BITS) {
> > > 		return NULL;
> > > 	}
> > > 
> > > so that we clamp the oas when phys_addr_t is 32-bit for you. That should
> > > allow you to remove lots of the checking from iopte_to_paddr() too if you
> > > check against oas in the map() function.
> > > 
> > > Does that make sense?
> > 
> > Of course I'm ok for this. I'm only afraid that this function has
> > already 3 checking "if (x) return NULL", Here we add a new one and so
> > many lines... Maybe the user should guarantee the right value of oas.
> > How about move it into mtk_iommu.c?
> > 
> > About the checking of iopte_to_paddr, I can not remove them. I know it
> > may be a bit special and not readable. Hmm, I guess I should use a MACRO
> > instead of the hard code 33 for the special 4GB mode case.
> 
> Why can't you just do something like:
> 
> 	if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT))
> 		return paddr;
> 
> 	if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
> 		paddr |= BIT_ULL(33);

OK here.

> 
> 	if (pte & ARM_V&S_ATTR_MTK_PA_BIT32)
> 		paddr |= BIT_ULL(32);

No here, The flow is a bit special for 4GB mode here.

This is the detailed remap relationship for our 4GB mode.
           CPU PA               ->    HW PA
register: 0x0 ~ 0x3fff_ffff
dram 1G:0x4000_0000~0x7fff_ffff ->0x1_4000_0000~0x1_7fff_ffff(Add bit32)
dram 2G:0x8000_0000~0xbfff_ffff ->0x1_8000_0000~0x1_bfff_ffff(Add bit32)
dram 3G:0xc000_0000~0xffff_ffff ->0x1_c000_0000~0x1_ffff_ffff(Add bit32)
dram 4G:0x1_0000_0000~0x1_3fff_ffff->0x1_0000_0000~0x1_3fff_ffff

Thus, in the 4GB mode, we should add always add bit9 in pte(for bit32
PA). But we can not always add bit32 in the iova_to_phys. The valid PA
range should be 0x4000_0000 - 0x1_3fff_ffff. Thus, we can only add bit32
when the PA in pte < 0x4000_0000, keep it as-is if the PA in pte located
from 0x4000_0000 to 0xffff_ffff.

This issue exist all the time after we added 4GB mode for mt8173.

Thus, I have to add a special flow for 4gb mode here:

	/* Workaround for MTK 4GB Mode: Add BIT32 only when PA < 0x4000_0000.*/
	if (cfg->oas == ARM_V7S_MTK_4GB_OAS && paddr < 0x40000000UL)
		paddr |= BIT_ULL(32);
	else if (pte & ARM_V7S_ATTR_MTK_PA_BIT32)
		paddr |= BIT_ULL(32);

> 
> 	return paddr;
> 
> The diff I sent previously sanitises the oas at init time, and then you
> can just enforce it in map().
> 
> Will
diff mbox series

Patch

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 94c38db..4077822 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -123,7 +123,9 @@ 
 #define ARM_V7S_TEX_MASK		0x7
 #define ARM_V7S_ATTR_TEX(val)		(((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)
 
-#define ARM_V7S_ATTR_MTK_4GB		BIT(9) /* MTK extend it for 4GB mode */
+/* MediaTek extend the two bits below for over 4GB mode */
+#define ARM_V7S_ATTR_MTK_PA_BIT32	BIT(9)
+#define ARM_V7S_ATTR_MTK_PA_BIT33	BIT(4)
 
 /* *well, except for TEX on level 2 large pages, of course :( */
 #define ARM_V7S_CONT_PAGE_TEX_SHIFT	6
@@ -190,13 +192,22 @@  static dma_addr_t __arm_v7s_dma_addr(void *pages)
 static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
 				    struct io_pgtable_cfg *cfg)
 {
-	return paddr & ARM_V7S_LVL_MASK(lvl);
+	arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl);
+
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) {
+		if (paddr & BIT_ULL(32))
+			pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
+		if (paddr & BIT_ULL(33))
+			pte |= ARM_V7S_ATTR_MTK_PA_BIT33;
+	}
+	return pte;
 }
 
 static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
 				  struct io_pgtable_cfg *cfg)
 {
 	arm_v7s_iopte mask;
+	phys_addr_t paddr;
 
 	if (ARM_V7S_PTE_IS_TABLE(pte, lvl))
 		mask = ARM_V7S_TABLE_MASK;
@@ -205,7 +216,14 @@  static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
 	else
 		mask = ARM_V7S_LVL_MASK(lvl);
 
-	return pte & mask;
+	paddr = pte & mask;
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) {
+		if (pte & ARM_V7S_ATTR_MTK_PA_BIT32)
+			paddr |= BIT_ULL(32);
+		if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
+			paddr |= BIT_ULL(33);
+	}
+	return paddr;
 }
 
 static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl,
@@ -326,9 +344,6 @@  static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
 	if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
 		pte |= ARM_V7S_ATTR_NS_SECTION;
 
-	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)
-		pte |= ARM_V7S_ATTR_MTK_4GB;
-
 	return pte;
 }
 
@@ -515,7 +530,9 @@  static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
 	if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
 		return 0;
 
-	if (WARN_ON(upper_32_bits(iova) || upper_32_bits(paddr)))
+	if (WARN_ON(upper_32_bits(iova)) ||
+	    WARN_ON(upper_32_bits(paddr) &&
+		    !(iop->cfg.quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)))
 		return -ERANGE;
 
 	ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd);
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index fefc2e0..4bab283 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -123,10 +123,11 @@  struct mtk_iommu_domain {
  *  0xc000_0000         0x1_c000_0000 ...
  *  0x1_0000_0000       0x1_0000_0000 (No change)
  *
- * The PA in the iova_to_phys that is got from v7s always is u32, But from
- * the CPU point of view, PA should add BIT(32) when PA < 0x4000_0000.
+ * Thus, We always add BIT32 in the iommu_map and disable BIT32 if PA is >=
+ * 0x1_4000_0000 in the iova_to_phys(From cpu point of view, the PA range is
+ * 0x4000_0000 - 0x1_3fff_ffff).
  */
-#define MTK_IOMMU_4GB_MODE_REMAP_BASE	 0x40000000
+#define MTK_IOMMU_4GB_MODE_REMAP_BASE	 0x140000000UL
 
 static LIST_HEAD(m4ulist);	/* List all the M4U HWs */
 
@@ -284,7 +285,8 @@  static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
 	dom->cfg = (struct io_pgtable_cfg) {
 		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
 			IO_PGTABLE_QUIRK_NO_PERMS |
-			IO_PGTABLE_QUIRK_TLBI_ON_MAP,
+			IO_PGTABLE_QUIRK_TLBI_ON_MAP |
+			IO_PGTABLE_QUIRK_ARM_MTK_4GB,
 		.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
 		.ias = 32,
 		.oas = 32,
@@ -292,9 +294,6 @@  static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
 		.iommu_dev = data->dev,
 	};
 
-	if (data->enable_4GB)
-		dom->cfg.quirks |= IO_PGTABLE_QUIRK_ARM_MTK_4GB;
-
 	dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
 	if (!dom->iop) {
 		dev_err(data->dev, "Failed to alloc io pgtable\n");
@@ -380,12 +379,16 @@  static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
 			 phys_addr_t paddr, size_t size, int prot)
 {
 	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
 	unsigned long flags;
 	int ret;
 
+	/* The "4GB mode" M4U physically can not use the lower remap of Dram. */
+	if (data->enable_4GB)
+		paddr |= BIT_ULL(32);
+
 	spin_lock_irqsave(&dom->pgtlock, flags);
-	ret = dom->iop->map(dom->iop, iova, paddr & DMA_BIT_MASK(32),
-			    size, prot);
+	ret = dom->iop->map(dom->iop, iova, paddr, size, prot);
 	spin_unlock_irqrestore(&dom->pgtlock, flags);
 
 	return ret;
@@ -422,8 +425,8 @@  static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
 	pa = dom->iop->iova_to_phys(dom->iop, iova);
 	spin_unlock_irqrestore(&dom->pgtlock, flags);
 
-	if (data->enable_4GB && pa < MTK_IOMMU_4GB_MODE_REMAP_BASE)
-		pa |= BIT_ULL(32);
+	if (data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE)
+		pa &= ~BIT_ULL(32);
 
 	return pa;
 }
@@ -615,6 +618,8 @@  static int mtk_iommu_probe(struct platform_device *pdev)
 
 	/* Whether the current dram is over 4GB */
 	data->enable_4GB = !!(max_pfn > (BIT_ULL(32) >> PAGE_SHIFT));
+	if (!data->plat_data->has_4gb_mode)
+		data->enable_4GB = false;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	data->base = devm_ioremap_resource(dev, res);
@@ -755,10 +760,12 @@  static int __maybe_unused mtk_iommu_resume(struct device *dev)
 
 static const struct mtk_iommu_plat_data mt2712_data = {
 	.m4u_plat     = M4U_MT2712,
+	.has_4gb_mode = true,
 };
 
 static const struct mtk_iommu_plat_data mt8173_data = {
 	.m4u_plat     = M4U_MT8173,
+	.has_4gb_mode = true,
 };
 
 static const struct of_device_id mtk_iommu_of_ids[] = {
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index 483d210..d7a001a 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -42,6 +42,7 @@  enum mtk_iommu_plat {
 
 struct mtk_iommu_plat_data {
 	enum mtk_iommu_plat m4u_plat;
+	bool                has_4gb_mode;
 };
 
 struct mtk_iommu_domain;