diff mbox

[v2,2/4] iommu/io-pgtable-arm: Support 52-bit physical address

Message ID fde583dacee8099ed4e952dbf1c4dfb42070e9c5.1512038236.git.robin.murphy@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Robin Murphy Dec. 14, 2017, 4:58 p.m. UTC
Bring io-pgtable-arm in line with the ARMv8.2-LPA feature allowing
52-bit physical addresses when using the 64KB translation granule.
This will be supported by SMMUv3.1.

Tested-by: Nate Watterson <nwatters@codeaurora.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---

v2: Fix TCR_PS/TCR_IPS copy-paste error

 drivers/iommu/io-pgtable-arm.c | 65 ++++++++++++++++++++++++++++++------------
 1 file changed, 47 insertions(+), 18 deletions(-)

Comments

Will Deacon Feb. 26, 2018, 6:05 p.m. UTC | #1
On Thu, Dec 14, 2017 at 04:58:51PM +0000, Robin Murphy wrote:
> Bring io-pgtable-arm in line with the ARMv8.2-LPA feature allowing
> 52-bit physical addresses when using the 64KB translation granule.
> This will be supported by SMMUv3.1.
> 
> Tested-by: Nate Watterson <nwatters@codeaurora.org>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> 
> v2: Fix TCR_PS/TCR_IPS copy-paste error
> 
>  drivers/iommu/io-pgtable-arm.c | 65 ++++++++++++++++++++++++++++++------------
>  1 file changed, 47 insertions(+), 18 deletions(-)

[...]

> @@ -203,6 +199,25 @@ struct arm_lpae_io_pgtable {
>  
>  typedef u64 arm_lpae_iopte;
>  
> +static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
> +				     struct arm_lpae_io_pgtable *data)
> +{
> +	arm_lpae_iopte pte = paddr;
> +
> +	/* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */
> +	return (pte | (pte >> 36)) & ARM_LPAE_PTE_ADDR_MASK;
> +}

I don't particularly like relying on properties of the paddr for correct
construction of the pte here. The existing macro doesn't have this
limitation. I suspect it's all fine at the moment because we only use TTBR0,
but I'd rather not bake that in if we can avoid it.

> +static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
> +				  struct arm_lpae_io_pgtable *data)
> +{
> +	phys_addr_t paddr = pte & ARM_LPAE_PTE_ADDR_MASK;
> +	phys_addr_t paddr_hi = paddr & (ARM_LPAE_GRANULE(data) - 1);
> +
> +	/* paddr_hi spans nothing for 4K granule, and only RES0 bits for 16K */
> +	return (paddr ^ paddr_hi) | (paddr_hi << 36);

Why do we need xor here?

>  static bool selftest_running = false;
>  
>  static dma_addr_t __arm_lpae_dma_addr(void *pages)
> @@ -287,7 +302,7 @@ static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
>  		pte |= ARM_LPAE_PTE_TYPE_BLOCK;
>  
>  	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
> -	pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
> +	pte |= paddr_to_iopte(paddr, data);
>  
>  	__arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
>  }
> @@ -528,7 +543,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
>  	if (size == split_sz)
>  		unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
>  
> -	blk_paddr = iopte_to_pfn(blk_pte, data) << data->pg_shift;
> +	blk_paddr = iopte_to_paddr(blk_pte, data);
>  	pte = iopte_prot(blk_pte);
>  
>  	for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
> @@ -652,12 +667,13 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
>  
>  found_translation:
>  	iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
> -	return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova;
> +	return iopte_to_paddr(pte, data) | iova;
>  }
>  
>  static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
>  {
> -	unsigned long granule;
> +	unsigned long granule, page_sizes;
> +	unsigned int max_addr_bits = 48;
>  
>  	/*
>  	 * We need to restrict the supported page sizes to match the
> @@ -677,17 +693,24 @@ static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
>  
>  	switch (granule) {
>  	case SZ_4K:
> -		cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
> +		page_sizes = (SZ_4K | SZ_2M | SZ_1G);
>  		break;
>  	case SZ_16K:
> -		cfg->pgsize_bitmap &= (SZ_16K | SZ_32M);
> +		page_sizes = (SZ_16K | SZ_32M);
>  		break;
>  	case SZ_64K:
> -		cfg->pgsize_bitmap &= (SZ_64K | SZ_512M);
> +		max_addr_bits = 52;
> +		page_sizes = (SZ_64K | SZ_512M);
> +		if (cfg->oas > 48)
> +			page_sizes |= 1ULL << 42; /* 4TB */
>  		break;
>  	default:
> -		cfg->pgsize_bitmap = 0;
> +		page_sizes = 0;
>  	}
> +
> +	cfg->pgsize_bitmap &= page_sizes;
> +	cfg->ias = min(cfg->ias, max_addr_bits);
> +	cfg->oas = min(cfg->oas, max_addr_bits);

I don't think we should be writing to the ias/oas fields here, at least
now without auditing the drivers and updating the comments about the
io-pgtable API. For example, the SMMUv3 driver uses its own ias local
variable to initialise the domain geometry, and won't pick up any changes
made here.

Will
Robin Murphy Feb. 27, 2018, 1:49 p.m. UTC | #2
On 26/02/18 18:05, Will Deacon wrote:
> On Thu, Dec 14, 2017 at 04:58:51PM +0000, Robin Murphy wrote:
>> Bring io-pgtable-arm in line with the ARMv8.2-LPA feature allowing
>> 52-bit physical addresses when using the 64KB translation granule.
>> This will be supported by SMMUv3.1.
>>
>> Tested-by: Nate Watterson <nwatters@codeaurora.org>
>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>> ---
>>
>> v2: Fix TCR_PS/TCR_IPS copy-paste error
>>
>>   drivers/iommu/io-pgtable-arm.c | 65 ++++++++++++++++++++++++++++++------------
>>   1 file changed, 47 insertions(+), 18 deletions(-)
> 
> [...]
> 
>> @@ -203,6 +199,25 @@ struct arm_lpae_io_pgtable {
>>   
>>   typedef u64 arm_lpae_iopte;
>>   
>> +static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
>> +				     struct arm_lpae_io_pgtable *data)
>> +{
>> +	arm_lpae_iopte pte = paddr;
>> +
>> +	/* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */
>> +	return (pte | (pte >> 36)) & ARM_LPAE_PTE_ADDR_MASK;
>> +}
> 
> I don't particularly like relying on properties of the paddr for correct
> construction of the pte here. The existing macro doesn't have this
> limitation. I suspect it's all fine at the moment because we only use TTBR0,
> but I'd rather not bake that in if we can avoid it.

What's the relevance of TTBR0 to physical addresses? :/

Note that by this point paddr has been validated against cfg->oas by 
arm_lpae_map(), and validated to be granule-aligned by iommu_map(), so 
it really can't be wrong under reasonable conditions.

>> +static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
>> +				  struct arm_lpae_io_pgtable *data)
>> +{
>> +	phys_addr_t paddr = pte & ARM_LPAE_PTE_ADDR_MASK;
>> +	phys_addr_t paddr_hi = paddr & (ARM_LPAE_GRANULE(data) - 1);
>> +
>> +	/* paddr_hi spans nothing for 4K granule, and only RES0 bits for 16K */
>> +	return (paddr ^ paddr_hi) | (paddr_hi << 36);
> 
> Why do we need xor here?

Because "(paddr ^ paddr_hi)" is more concise than "(paddr & 
~(phys_addr_t)(ARM_LPAE_GRANULE(data) - 1)" or variants thereof. It's 
potentially a teeny bit more efficient too, I think, but it's mostly 
about the readability.

>>   static bool selftest_running = false;
>>   
>>   static dma_addr_t __arm_lpae_dma_addr(void *pages)
>> @@ -287,7 +302,7 @@ static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
>>   		pte |= ARM_LPAE_PTE_TYPE_BLOCK;
>>   
>>   	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
>> -	pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
>> +	pte |= paddr_to_iopte(paddr, data);
>>   
>>   	__arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
>>   }
>> @@ -528,7 +543,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
>>   	if (size == split_sz)
>>   		unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
>>   
>> -	blk_paddr = iopte_to_pfn(blk_pte, data) << data->pg_shift;
>> +	blk_paddr = iopte_to_paddr(blk_pte, data);
>>   	pte = iopte_prot(blk_pte);
>>   
>>   	for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
>> @@ -652,12 +667,13 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
>>   
>>   found_translation:
>>   	iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
>> -	return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova;
>> +	return iopte_to_paddr(pte, data) | iova;
>>   }
>>   
>>   static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
>>   {
>> -	unsigned long granule;
>> +	unsigned long granule, page_sizes;
>> +	unsigned int max_addr_bits = 48;
>>   
>>   	/*
>>   	 * We need to restrict the supported page sizes to match the
>> @@ -677,17 +693,24 @@ static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
>>   
>>   	switch (granule) {
>>   	case SZ_4K:
>> -		cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
>> +		page_sizes = (SZ_4K | SZ_2M | SZ_1G);
>>   		break;
>>   	case SZ_16K:
>> -		cfg->pgsize_bitmap &= (SZ_16K | SZ_32M);
>> +		page_sizes = (SZ_16K | SZ_32M);
>>   		break;
>>   	case SZ_64K:
>> -		cfg->pgsize_bitmap &= (SZ_64K | SZ_512M);
>> +		max_addr_bits = 52;
>> +		page_sizes = (SZ_64K | SZ_512M);
>> +		if (cfg->oas > 48)
>> +			page_sizes |= 1ULL << 42; /* 4TB */
>>   		break;
>>   	default:
>> -		cfg->pgsize_bitmap = 0;
>> +		page_sizes = 0;
>>   	}
>> +
>> +	cfg->pgsize_bitmap &= page_sizes;
>> +	cfg->ias = min(cfg->ias, max_addr_bits);
>> +	cfg->oas = min(cfg->oas, max_addr_bits);
> 
> I don't think we should be writing to the ias/oas fields here, at least
> now without auditing the drivers and updating the comments about the
> io-pgtable API. For example, the SMMUv3 driver uses its own ias local
> variable to initialise the domain geometry, and won't pick up any changes
> made here.

As you've discovered, the driver thing is indeed true. More generally, 
though, we've always adjusted cfg->pgsize_bitmap here, so I don't see 
why other fields should be treated differently - I've always assumed the 
cfg which the driver passes in here just represents its total maximum 
capabilities, from which it's io-pgtable's job to pick an appropriate 
configuration.

Robin.
Will Deacon March 6, 2018, 5:54 p.m. UTC | #3
Hey Robin,

On Tue, Feb 27, 2018 at 01:49:14PM +0000, Robin Murphy wrote:
> On 26/02/18 18:05, Will Deacon wrote:
> >On Thu, Dec 14, 2017 at 04:58:51PM +0000, Robin Murphy wrote:
> >>Bring io-pgtable-arm in line with the ARMv8.2-LPA feature allowing
> >>52-bit physical addresses when using the 64KB translation granule.
> >>This will be supported by SMMUv3.1.
> >>
> >>Tested-by: Nate Watterson <nwatters@codeaurora.org>
> >>Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> >>---
> >>
> >>v2: Fix TCR_PS/TCR_IPS copy-paste error
> >>
> >>  drivers/iommu/io-pgtable-arm.c | 65 ++++++++++++++++++++++++++++++------------
> >>  1 file changed, 47 insertions(+), 18 deletions(-)
> >
> >[...]
> >
> >>@@ -203,6 +199,25 @@ struct arm_lpae_io_pgtable {
> >>  typedef u64 arm_lpae_iopte;
> >>+static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
> >>+				     struct arm_lpae_io_pgtable *data)
> >>+{
> >>+	arm_lpae_iopte pte = paddr;
> >>+
> >>+	/* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */
> >>+	return (pte | (pte >> 36)) & ARM_LPAE_PTE_ADDR_MASK;
> >>+}
> >
> >I don't particularly like relying on properties of the paddr for correct
> >construction of the pte here. The existing macro doesn't have this
> >limitation. I suspect it's all fine at the moment because we only use TTBR0,
> >but I'd rather not bake that in if we can avoid it.
> 
> What's the relevance of TTBR0 to physical addresses? :/

Good point! I clearly got confused here.

> Note that by this point paddr has been validated against cfg->oas by
> arm_lpae_map(), and validated to be granule-aligned by iommu_map(), so it
> really can't be wrong under reasonable conditions.

Fair enough, but I still think this would be a bit more readable written
along the lines of:

	arm_lpae_iopte pte_hi, pte_lo = 0;

	pte_hi = paddr & GENMASK(47, data->pg_shift);
	if (data->pg_shift == 16) {
		pte_lo = paddr & GENMASK(ARM_LPAE_MAX_ADDR_BITS - 1, 48);
		pte_lo >>= (48 - 12);
	}

	return pte_hi | pte_lo;

Ok, we don't squeeze every last cycle out of it, but I find it much more
readable. Is it just me? The magic numbers could be #defined and/or
derived if necessary.

> >>+static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
> >>+				  struct arm_lpae_io_pgtable *data)
> >>+{
> >>+	phys_addr_t paddr = pte & ARM_LPAE_PTE_ADDR_MASK;
> >>+	phys_addr_t paddr_hi = paddr & (ARM_LPAE_GRANULE(data) - 1);
> >>+
> >>+	/* paddr_hi spans nothing for 4K granule, and only RES0 bits for 16K */
> >>+	return (paddr ^ paddr_hi) | (paddr_hi << 36);
> >
> >Why do we need xor here?
> 
> Because "(paddr ^ paddr_hi)" is more concise than "(paddr &
> ~(phys_addr_t)(ARM_LPAE_GRANULE(data) - 1)" or variants thereof. It's
> potentially a teeny bit more efficient too, I think, but it's mostly about
> the readability.

I don't think the bit-twiddling is super readable, to be honest. Again,
something like:

	phys_addr_t paddr_lo, paddr_hi = 0;

	paddr_lo = pte & GENMASK(47, data->pg_shift);
	if (data->pg_shift == 16) {
		paddr_hi = pte & GENMASK(15, 12);
		paddr_hi <<= (48 - 12);
	}

	return paddr_hi | paddr_lo;

but I've not even compiled this stuff...

> >>+	cfg->pgsize_bitmap &= page_sizes;
> >>+	cfg->ias = min(cfg->ias, max_addr_bits);
> >>+	cfg->oas = min(cfg->oas, max_addr_bits);
> >
> >I don't think we should be writing to the ias/oas fields here, at least
> >now without auditing the drivers and updating the comments about the
> >io-pgtable API. For example, the SMMUv3 driver uses its own ias local
> >variable to initialise the domain geometry, and won't pick up any changes
> >made here.
> 
> As you've discovered, the driver thing is indeed true. More generally,
> though, we've always adjusted cfg->pgsize_bitmap here, so I don't see why
> other fields should be treated differently - I've always assumed the cfg
> which the driver passes in here just represents its total maximum
> capabilities, from which it's io-pgtable's job to pick an appropriate
> configuration.

Can you update the the header file with a comment to that effect, please?

Will
diff mbox

Patch

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 51e5c43caed1..10d888b02948 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -21,6 +21,7 @@ 
 #define pr_fmt(fmt)	"arm-lpae io-pgtable: " fmt
 
 #include <linux/atomic.h>
+#include <linux/bitops.h>
 #include <linux/iommu.h>
 #include <linux/kernel.h>
 #include <linux/sizes.h>
@@ -32,7 +33,7 @@ 
 
 #include "io-pgtable.h"
 
-#define ARM_LPAE_MAX_ADDR_BITS		48
+#define ARM_LPAE_MAX_ADDR_BITS		52
 #define ARM_LPAE_S2_MAX_CONCAT_PAGES	16
 #define ARM_LPAE_MAX_LEVELS		4
 
@@ -86,6 +87,8 @@ 
 #define ARM_LPAE_PTE_TYPE_TABLE		3
 #define ARM_LPAE_PTE_TYPE_PAGE		3
 
+#define ARM_LPAE_PTE_ADDR_MASK		GENMASK_ULL(47,12)
+
 #define ARM_LPAE_PTE_NSTABLE		(((arm_lpae_iopte)1) << 63)
 #define ARM_LPAE_PTE_XN			(((arm_lpae_iopte)3) << 53)
 #define ARM_LPAE_PTE_AF			(((arm_lpae_iopte)1) << 10)
@@ -159,6 +162,7 @@ 
 #define ARM_LPAE_TCR_PS_42_BIT		0x3ULL
 #define ARM_LPAE_TCR_PS_44_BIT		0x4ULL
 #define ARM_LPAE_TCR_PS_48_BIT		0x5ULL
+#define ARM_LPAE_TCR_PS_52_BIT		0x6ULL
 
 #define ARM_LPAE_MAIR_ATTR_SHIFT(n)	((n) << 3)
 #define ARM_LPAE_MAIR_ATTR_MASK		0xff
@@ -170,9 +174,7 @@ 
 #define ARM_LPAE_MAIR_ATTR_IDX_DEV	2
 
 /* IOPTE accessors */
-#define iopte_deref(pte,d)					\
-	(__va((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1)	\
-	& ~(ARM_LPAE_GRANULE(d) - 1ULL)))
+#define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
 
 #define iopte_type(pte,l)					\
 	(((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK)
@@ -184,12 +186,6 @@ 
 		(iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_PAGE) :	\
 		(iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_BLOCK))
 
-#define iopte_to_pfn(pte,d)					\
-	(((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1)) >> (d)->pg_shift)
-
-#define pfn_to_iopte(pfn,d)					\
-	(((pfn) << (d)->pg_shift) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1))
-
 struct arm_lpae_io_pgtable {
 	struct io_pgtable	iop;
 
@@ -203,6 +199,25 @@  struct arm_lpae_io_pgtable {
 
 typedef u64 arm_lpae_iopte;
 
+static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
+				     struct arm_lpae_io_pgtable *data)
+{
+	arm_lpae_iopte pte = paddr;
+
+	/* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */
+	return (pte | (pte >> 36)) & ARM_LPAE_PTE_ADDR_MASK;
+}
+
+static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
+				  struct arm_lpae_io_pgtable *data)
+{
+	phys_addr_t paddr = pte & ARM_LPAE_PTE_ADDR_MASK;
+	phys_addr_t paddr_hi = paddr & (ARM_LPAE_GRANULE(data) - 1);
+
+	/* paddr_hi spans nothing for 4K granule, and only RES0 bits for 16K */
+	return (paddr ^ paddr_hi) | (paddr_hi << 36);
+}
+
 static bool selftest_running = false;
 
 static dma_addr_t __arm_lpae_dma_addr(void *pages)
@@ -287,7 +302,7 @@  static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 		pte |= ARM_LPAE_PTE_TYPE_BLOCK;
 
 	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
-	pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
+	pte |= paddr_to_iopte(paddr, data);
 
 	__arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
 }
@@ -528,7 +543,7 @@  static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 	if (size == split_sz)
 		unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
 
-	blk_paddr = iopte_to_pfn(blk_pte, data) << data->pg_shift;
+	blk_paddr = iopte_to_paddr(blk_pte, data);
 	pte = iopte_prot(blk_pte);
 
 	for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
@@ -652,12 +667,13 @@  static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
 
 found_translation:
 	iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
-	return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova;
+	return iopte_to_paddr(pte, data) | iova;
 }
 
 static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
 {
-	unsigned long granule;
+	unsigned long granule, page_sizes;
+	unsigned int max_addr_bits = 48;
 
 	/*
 	 * We need to restrict the supported page sizes to match the
@@ -677,17 +693,24 @@  static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
 
 	switch (granule) {
 	case SZ_4K:
-		cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
+		page_sizes = (SZ_4K | SZ_2M | SZ_1G);
 		break;
 	case SZ_16K:
-		cfg->pgsize_bitmap &= (SZ_16K | SZ_32M);
+		page_sizes = (SZ_16K | SZ_32M);
 		break;
 	case SZ_64K:
-		cfg->pgsize_bitmap &= (SZ_64K | SZ_512M);
+		max_addr_bits = 52;
+		page_sizes = (SZ_64K | SZ_512M);
+		if (cfg->oas > 48)
+			page_sizes |= 1ULL << 42; /* 4TB */
 		break;
 	default:
-		cfg->pgsize_bitmap = 0;
+		page_sizes = 0;
 	}
+
+	cfg->pgsize_bitmap &= page_sizes;
+	cfg->ias = min(cfg->ias, max_addr_bits);
+	cfg->oas = min(cfg->oas, max_addr_bits);
 }
 
 static struct arm_lpae_io_pgtable *
@@ -784,6 +807,9 @@  arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	case 48:
 		reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_IPS_SHIFT);
 		break;
+	case 52:
+		reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_IPS_SHIFT);
+		break;
 	default:
 		goto out_free_data;
 	}
@@ -891,6 +917,9 @@  arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 	case 48:
 		reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_PS_SHIFT);
 		break;
+	case 52:
+		reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_PS_SHIFT);
+		break;
 	default:
 		goto out_free_data;
 	}