diff mbox series

[RFC,v2,05/13] hw/arm/smmu-common: Support nested translation

Message ID 20240408140818.3799590-6-smostafa@google.com (mailing list archive)
State New, archived
Headers show
Series SMMUv3 nested translation support | expand

Commit Message

Mostafa Saleh April 8, 2024, 2:08 p.m. UTC
When nested translation is requested, do the following:

- Translate stage-1 IPA using stage-2 to a physical address.
- Translate stage-1 PTW walks using stage-2.
- Combine both to create a single TLB entry, for that we choose
  the smallest entry to cache, which means that if the smallest
  entry comes from stage-2, and stage-2 use different granule,
  TLB lookup for stage-1 (in nested config) will always miss.
  Lookup logic is modified for nesting to lookup using stage-2
  granule if stage-1 granule missed and they are different.

Also, add more visibility in trace points, to make it easier to debug.

Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
 hw/arm/smmu-common.c         | 153 ++++++++++++++++++++++++++++-------
 hw/arm/trace-events          |   6 +-
 include/hw/arm/smmu-common.h |   3 +-
 3 files changed, 131 insertions(+), 31 deletions(-)

Comments

Eric Auger April 18, 2024, 1:54 p.m. UTC | #1
Hi Mostafa,

On 4/8/24 16:08, Mostafa Saleh wrote:
> When nested translation is requested, do the following:
>
> - Translate stage-1 IPA using stage-2 to a physical address.
> - Translate stage-1 PTW walks using stage-2.
> - Combine both to create a single TLB entry, for that we choose
>   the smallest entry to cache, which means that if the smallest
>   entry comes from stage-2, and stage-2 use different granule,
>   TLB lookup for stage-1 (in nested config) will always miss.
>   Lookup logic is modified for nesting to lookup using stage-2
>   granule if stage-1 granule missed and they are different.
>
> Also, add more visibility in trace points, to make it easier to debug.
>
> Signed-off-by: Mostafa Saleh <smostafa@google.com>
> ---
>  hw/arm/smmu-common.c         | 153 ++++++++++++++++++++++++++++-------
>  hw/arm/trace-events          |   6 +-
>  include/hw/arm/smmu-common.h |   3 +-
>  3 files changed, 131 insertions(+), 31 deletions(-)
>
> diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> index 771b9c79a3..2cf27b490b 100644
> --- a/hw/arm/smmu-common.c
> +++ b/hw/arm/smmu-common.c
> @@ -66,8 +66,10 @@ SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova,
>      return key;
>  }
>  
> -SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
> -                                SMMUTransTableInfo *tt, hwaddr iova)
> +static SMMUTLBEntry *smmu_iotlb_lookup_all_levels(SMMUState *bs,
> +                                                  SMMUTransCfg *cfg,
> +                                                  SMMUTransTableInfo *tt,
> +                                                  hwaddr iova)
this helper can be introduced in a separate patch to ease the code review
>  {
>      uint8_t tg = (tt->granule_sz - 10) / 2;
>      uint8_t inputsize = 64 - tt->tsz;
> @@ -88,10 +90,29 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
>          }
>          level++;
>      }
> +    return entry;
> +}
> +
> +SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
> +                                SMMUTransTableInfo *tt, hwaddr iova)
> +{
> +    SMMUTLBEntry *entry = NULL;
> +
> +    entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
> +    /*
> +     * For nested translation also use the s2 granule, as the TLB will insert
> +     * the smallest of both, so the entry can be cached with the s2 granule.
> +     */
> +    if (!entry && (cfg->stage == SMMU_NESTED) &&
> +        (cfg->s2cfg.granule_sz != tt->granule_sz)) {
> +        tt->granule_sz = cfg->s2cfg.granule_sz;
> +        entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
this is also the kind of stuff that can be introduced and reviewed
separately without tkaing any risk until NESTED is not support. In this
new patch you could also document the TLB strategy.
> +    }
>  
>      if (entry) {
>          cfg->iotlb_hits++;
>          trace_smmu_iotlb_lookup_hit(cfg->asid, cfg->s2cfg.vmid, iova,
> +                                    entry->entry.addr_mask,
can be moved to a separate fix. same for the trace point changes
>                                      cfg->iotlb_hits, cfg->iotlb_misses,
>                                      100 * cfg->iotlb_hits /
>                                      (cfg->iotlb_hits + cfg->iotlb_misses));
> @@ -117,7 +138,7 @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *new)
>      *key = smmu_get_iotlb_key(cfg->asid, cfg->s2cfg.vmid, new->entry.iova,
>                                tg, new->level);
>      trace_smmu_iotlb_insert(cfg->asid, cfg->s2cfg.vmid, new->entry.iova,
> -                            tg, new->level);
> +                            tg, new->level, new->entry.translated_addr);
>      g_hash_table_insert(bs->iotlb, key, new);
>  }
>  
> @@ -286,6 +307,27 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova)
>      return NULL;
>  }
>  
> +/* Return the correct table address based on configuration. */
does the S2 translation for a PTE table?
> +static inline int translate_table_s1(dma_addr_t *table_addr, SMMUTransCfg *cfg,
> +                                     SMMUPTWEventInfo *info, SMMUState *bs)
> +{
> +    dma_addr_t addr = *table_addr;
> +    SMMUTLBEntry *cached_entry;
> +
> +    if (cfg->stage != SMMU_NESTED) {
> +        return 0;
> +    }
> +
> +    CALL_FUNC_CFG_S2(cfg, cached_entry, smmu_translate,
> +                     bs, cfg, addr, IOMMU_RO, info);
> +
> +    if (cached_entry) {
> +        *table_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr);
> +        return 0;
> +    }
> +    return -EINVAL;
> +}
> +
>  /**
>   * smmu_ptw_64_s1 - VMSAv8-64 Walk of the page tables for a given IOVA
>   * @cfg: translation config
> @@ -301,7 +343,8 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova)
>   */
>  static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
>                            dma_addr_t iova, IOMMUAccessFlags perm,
> -                          SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
> +                          SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info,
> +                          SMMUState *bs)
>  {
>      dma_addr_t baseaddr, indexmask;
>      SMMUStage stage = cfg->stage;
> @@ -349,6 +392,10 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
>                  goto error;
>              }
>              baseaddr = get_table_pte_address(pte, granule_sz);
> +            /* In case of failure, retain stage-2 fault. */
link to the doc?
> +            if (translate_table_s1(&baseaddr, cfg, info, bs)) {
let's avoid that call if S1 only

What about the error handling. I am not sure we are covering all the
cases listed in 7.3.12 F_WALK_EABT for instance?
> +                goto error_no_stage;
> +            }
>              level++;
>              continue;
>          } else if (is_page_pte(pte, level)) {
> @@ -384,7 +431,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
>          tlbe->entry.translated_addr = gpa;
>          tlbe->entry.iova = iova & ~mask;
>          tlbe->entry.addr_mask = mask;
> -        tlbe->entry.perm = PTE_AP_TO_PERM(ap);
> +        tlbe->parent_perm = tlbe->entry.perm = PTE_AP_TO_PERM(ap);
>          tlbe->level = level;
>          tlbe->granule = granule_sz;
>          return 0;
> @@ -393,6 +440,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
>  
>  error:
>      info->stage = SMMU_STAGE_1;
> +error_no_stage:
>      tlbe->entry.perm = IOMMU_NONE;
>      return -EINVAL;
>  }
> @@ -505,7 +553,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
>          tlbe->entry.translated_addr = gpa;
>          tlbe->entry.iova = ipa & ~mask;
>          tlbe->entry.addr_mask = mask;
> -        tlbe->entry.perm = s2ap;
> +        tlbe->parent_perm = tlbe->entry.perm = s2ap;
>          tlbe->level = level;
>          tlbe->granule = granule_sz;
>          return 0;
> @@ -518,6 +566,28 @@ error:
>      return -EINVAL;
>  }
>  
> +/* Combine 2 TLB enteries and return in tlbe. */
entries.
Would suggest combine
> +static void combine_tlb(SMMUTLBEntry *tlbe, SMMUTLBEntry *tlbe_s2,
> +                        dma_addr_t iova, SMMUTransCfg *cfg)
> +{
> +        if (cfg->stage == SMMU_NESTED) {
> +            tlbe->entry.addr_mask = MIN(tlbe->entry.addr_mask,
> +                                        tlbe_s2->entry.addr_mask);
> +            tlbe->entry.translated_addr = CACHED_ENTRY_TO_ADDR(tlbe_s2,
> +                                          tlbe->entry.translated_addr);
> +
> +            tlbe->granule = MIN(tlbe->granule, tlbe_s2->granule);
could you add a link to the spec so we can clearly check what this code
is written against?
> +            tlbe->level = MAX(tlbe->level, tlbe_s2->level);
> +            tlbe->entry.iova = iova & ~tlbe->entry.addr_mask;
> +            /* parent_perm has s2 perm while perm has s1 perm. */
> +            tlbe->parent_perm = tlbe_s2->entry.perm;
> +            return;
> +        }
> +
> +        /* That was not nested, use the s2. */
should be removed and tested ;-)
> +        memcpy(tlbe, tlbe_s2, sizeof(*tlbe));
You shall avoid doing that memcpy if not necessary. I would advocate for
separate paths for S2 and nested.
> +}
> +
>  /**
>   * smmu_ptw - Walk the page tables for an IOVA, according to @cfg
need to update the above args desc
>   *
> @@ -530,28 +600,59 @@ error:
>   * return 0 on success
>   */
>  int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
> -             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
> +             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info, SMMUState *bs)
>  {
> -    if (cfg->stage == SMMU_STAGE_1) {
> -        return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info);
> -    } else if (cfg->stage == SMMU_STAGE_2) {
> -        /*
> -         * If bypassing stage 1(or unimplemented), the input address is passed
> -         * directly to stage 2 as IPA. If the input address of a transaction
> -         * exceeds the size of the IAS, a stage 1 Address Size fault occurs.
> -         * For AA64, IAS = OAS according to (IHI 0070.E.a) "3.4 Address sizes"
> -         */
> -        if (iova >= (1ULL << cfg->oas)) {
> -            info->type = SMMU_PTW_ERR_ADDR_SIZE;
> -            info->stage = SMMU_STAGE_1;
> -            tlbe->entry.perm = IOMMU_NONE;
> -            return -EINVAL;
> +    int ret = 0;
> +    SMMUTLBEntry tlbe_s2;
> +    dma_addr_t ipa = iova;
> +
> +    if (cfg->stage & SMMU_STAGE_1) {
> +        ret = smmu_ptw_64_s1(cfg, iova, perm, tlbe, info, bs);
> +        if (ret) {
> +            return ret;
>          }
> +        /* This is the IPA for next stage.*/
but you don't necessarily have the S2 enabled so this is not necessarily
an IPA
> +        ipa = CACHED_ENTRY_TO_ADDR(tlbe, iova);
> +    }
>  
> -        return smmu_ptw_64_s2(cfg, iova, perm, tlbe, info);
> +    /*
> +     * The address output from the translation causes a stage 1 Address Size
> +     * fault if it exceeds the range of the effective IPA size for the given CD.
> +     * If bypassing stage 1(or unimplemented), the input address is passed
> +     * directly to stage 2 as IPA. If the input address of a transaction
> +     * exceeds the size of the IAS, a stage 1 Address Size fault occurs.
> +     * For AA64, IAS = OAS according to (IHI 0070.E.a) "3.4 Address sizes"
> +     */
> +    if (ipa >= (1ULL << cfg->oas)) {
in case we have S2 only, would be clearer to use ias instead (despite
above comment say they are the same)
> +        info->type = SMMU_PTW_ERR_ADDR_SIZE;
> +        info->stage = SMMU_STAGE_1;
What does the stage really means. That should be documented in the
struct I think
> +        tlbe->entry.perm = IOMMU_NONE;
> +        return -EINVAL;
>      }
this check also is introduced for S1 only. If this is a fix this should
be brought separately.
Also the above comment refers to IPA. Does it also hold for S1 only. Is
the check identical in that case?
>  
> -    g_assert_not_reached();
> +    if (cfg->stage & SMMU_STAGE_2) {
> +        ret = smmu_ptw_64_s2(cfg, ipa, perm, &tlbe_s2, info);
> +        if (ret) {
> +            return ret;
> +        }
> +        combine_tlb(tlbe, &tlbe_s2, iova, cfg);
> +    }
I think I would prefer the S1, S2, nested cases cleary separated at the
price of some code duplication. I am afraid serializing stuff make the
code less maintainable.
Also it is important the S1 case is not altered in terms of perf.
> +
> +    return ret;
> +}
> +
> +static int validate_tlb_entry(SMMUTLBEntry *cached_entry, IOMMUAccessFlags flag,
> +                              SMMUPTWEventInfo *info)
> +{
> +        if ((flag & IOMMU_WO) && !(cached_entry->entry.perm &
> +            cached_entry->parent_perm & IOMMU_WO)) {
> +            info->type = SMMU_PTW_ERR_PERMISSION;
> +            info->stage = !(cached_entry->entry.perm & IOMMU_WO) ?
> +                          SMMU_STAGE_1 :
> +                          SMMU_STAGE_2;
> +            return -EINVAL;
> +        }
> +        return 0;
>  }
>  
>  SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
> @@ -595,16 +696,14 @@ SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
>  
>      cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr);
>      if (cached_entry) {
> -        if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
> -            info->type = SMMU_PTW_ERR_PERMISSION;
> -            info->stage = cfg->stage;
> +        if (validate_tlb_entry(cached_entry, flag, info)) {
>              return NULL;
>          }
>          return cached_entry;
>      }
>  
>      cached_entry = g_new0(SMMUTLBEntry, 1);
> -    status = smmu_ptw(cfg, aligned_addr, flag, cached_entry, info);
> +    status = smmu_ptw(cfg, aligned_addr, flag, cached_entry, info, bs);
>      if (status) {
>              g_free(cached_entry);
>              return NULL;
> diff --git a/hw/arm/trace-events b/hw/arm/trace-events
> index cc12924a84..5f23f0b963 100644
> --- a/hw/arm/trace-events
> +++ b/hw/arm/trace-events
> @@ -15,9 +15,9 @@ smmu_iotlb_inv_asid(uint16_t asid) "IOTLB invalidate asid=%d"
>  smmu_iotlb_inv_vmid(uint16_t vmid) "IOTLB invalidate vmid=%d"
>  smmu_iotlb_inv_iova(uint16_t asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64
>  smmu_inv_notifiers_mr(const char *name) "iommu mr=%s"
> -smmu_iotlb_lookup_hit(uint16_t asid, uint16_t vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
> -smmu_iotlb_lookup_miss(uint16_t asid, uint16_t vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
> -smmu_iotlb_insert(uint16_t asid, uint16_t vmid, uint64_t addr, uint8_t tg, uint8_t level) "IOTLB ++ asid=%d vmid=%d addr=0x%"PRIx64" tg=%d level=%d"
> +smmu_iotlb_lookup_hit(int asid, uint16_t vmid, uint64_t addr, uint64_t mask, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" mask=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
> +smmu_iotlb_lookup_miss(int asid, uint16_t vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
> +smmu_iotlb_insert(int asid, uint16_t vmid, uint64_t addr, uint8_t tg, uint8_t level, uint64_t translate_addr) "IOTLB ++ asid=%d vmid=%d addr=0x%"PRIx64" tg=%d level=%d translate_addr=0x%"PRIx64
>  
>  # smmuv3.c
>  smmuv3_read_mmio(uint64_t addr, uint64_t val, unsigned size, uint32_t r) "addr: 0x%"PRIx64" val:0x%"PRIx64" size: 0x%x(%d)"
> diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
> index 2772175115..03ff0f02ba 100644
> --- a/include/hw/arm/smmu-common.h
> +++ b/include/hw/arm/smmu-common.h
> @@ -91,6 +91,7 @@ typedef struct SMMUTLBEntry {
>      IOMMUTLBEntry entry;
>      uint8_t level;
>      uint8_t granule;
> +    IOMMUAccessFlags parent_perm;
>  } SMMUTLBEntry;
>  
>  /* Stage-2 configuration. */
> @@ -198,7 +199,7 @@ static inline uint16_t smmu_get_sid(SMMUDevice *sdev)
>   * pair, according to @cfg translation config
>   */
>  int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
> -             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info);
> +             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info, SMMUState *bs);
>  
>  
>  /*
To be honest this patch is quite complex to review. I would suggest you
try to split it.

Thanks

Eric
Mostafa Saleh April 19, 2024, 10:54 a.m. UTC | #2
Hi Eric,

On Thu, Apr 18, 2024 at 03:54:01PM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 4/8/24 16:08, Mostafa Saleh wrote:
> > When nested translation is requested, do the following:
> >
> > - Translate stage-1 IPA using stage-2 to a physical address.
> > - Translate stage-1 PTW walks using stage-2.
> > - Combine both to create a single TLB entry, for that we choose
> >   the smallest entry to cache, which means that if the smallest
> >   entry comes from stage-2, and stage-2 use different granule,
> >   TLB lookup for stage-1 (in nested config) will always miss.
> >   Lookup logic is modified for nesting to lookup using stage-2
> >   granule if stage-1 granule missed and they are different.
> >
> > Also, add more visibility in trace points, to make it easier to debug.
> >
> > Signed-off-by: Mostafa Saleh <smostafa@google.com>
> > ---
> >  hw/arm/smmu-common.c         | 153 ++++++++++++++++++++++++++++-------
> >  hw/arm/trace-events          |   6 +-
> >  include/hw/arm/smmu-common.h |   3 +-
> >  3 files changed, 131 insertions(+), 31 deletions(-)
> >
> > diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> > index 771b9c79a3..2cf27b490b 100644
> > --- a/hw/arm/smmu-common.c
> > +++ b/hw/arm/smmu-common.c
> > @@ -66,8 +66,10 @@ SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova,
> >      return key;
> >  }
> >  
> > -SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
> > -                                SMMUTransTableInfo *tt, hwaddr iova)
> > +static SMMUTLBEntry *smmu_iotlb_lookup_all_levels(SMMUState *bs,
> > +                                                  SMMUTransCfg *cfg,
> > +                                                  SMMUTransTableInfo *tt,
> > +                                                  hwaddr iova)
> this helper can be introduced in a separate patch to ease the code review

Will do.

> >  {
> >      uint8_t tg = (tt->granule_sz - 10) / 2;
> >      uint8_t inputsize = 64 - tt->tsz;
> > @@ -88,10 +90,29 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
> >          }
> >          level++;
> >      }
> > +    return entry;
> > +}
> > +
> > +SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
> > +                                SMMUTransTableInfo *tt, hwaddr iova)
> > +{
> > +    SMMUTLBEntry *entry = NULL;
> > +
> > +    entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
> > +    /*
> > +     * For nested translation also use the s2 granule, as the TLB will insert
> > +     * the smallest of both, so the entry can be cached with the s2 granule.
> > +     */
> > +    if (!entry && (cfg->stage == SMMU_NESTED) &&
> > +        (cfg->s2cfg.granule_sz != tt->granule_sz)) {
> > +        tt->granule_sz = cfg->s2cfg.granule_sz;
> > +        entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
> this is also the kind of stuff that can be introduced and reviewed
> separately without tkaing any risk until NESTED is not support. In this
> new patch you could also document the TLB strategy.

Will do.

> > +    }
> >  
> >      if (entry) {
> >          cfg->iotlb_hits++;
> >          trace_smmu_iotlb_lookup_hit(cfg->asid, cfg->s2cfg.vmid, iova,
> > +                                    entry->entry.addr_mask,
> can be moved to a separate fix. same for the trace point changes

Will do.

> >                                      cfg->iotlb_hits, cfg->iotlb_misses,
> >                                      100 * cfg->iotlb_hits /
> >                                      (cfg->iotlb_hits + cfg->iotlb_misses));
> > @@ -117,7 +138,7 @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *new)
> >      *key = smmu_get_iotlb_key(cfg->asid, cfg->s2cfg.vmid, new->entry.iova,
> >                                tg, new->level);
> >      trace_smmu_iotlb_insert(cfg->asid, cfg->s2cfg.vmid, new->entry.iova,
> > -                            tg, new->level);
> > +                            tg, new->level, new->entry.translated_addr);
> >      g_hash_table_insert(bs->iotlb, key, new);
> >  }
> >  
> > @@ -286,6 +307,27 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova)
> >      return NULL;
> >  }
> >  
> > +/* Return the correct table address based on configuration. */
> does the S2 translation for a PTE table?

The intention was to abstract that, and it just return the table address
with or with translation, but I can move the check outside and this
always translates.

> > +static inline int translate_table_s1(dma_addr_t *table_addr, SMMUTransCfg *cfg,
> > +                                     SMMUPTWEventInfo *info, SMMUState *bs)
> > +{
> > +    dma_addr_t addr = *table_addr;
> > +    SMMUTLBEntry *cached_entry;
> > +
> > +    if (cfg->stage != SMMU_NESTED) {
> > +        return 0;
> > +    }
> > +
> > +    CALL_FUNC_CFG_S2(cfg, cached_entry, smmu_translate,
> > +                     bs, cfg, addr, IOMMU_RO, info);
> > +
> > +    if (cached_entry) {
> > +        *table_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr);
> > +        return 0;
> > +    }
> > +    return -EINVAL;
> > +}
> > +
> >  /**
> >   * smmu_ptw_64_s1 - VMSAv8-64 Walk of the page tables for a given IOVA
> >   * @cfg: translation config
> > @@ -301,7 +343,8 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova)
> >   */
> >  static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
> >                            dma_addr_t iova, IOMMUAccessFlags perm,
> > -                          SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
> > +                          SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info,
> > +                          SMMUState *bs)
> >  {
> >      dma_addr_t baseaddr, indexmask;
> >      SMMUStage stage = cfg->stage;
> > @@ -349,6 +392,10 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
> >                  goto error;
> >              }
> >              baseaddr = get_table_pte_address(pte, granule_sz);
> > +            /* In case of failure, retain stage-2 fault. */
> link to the doc?

Will do.

> > +            if (translate_table_s1(&baseaddr, cfg, info, bs)) {
> let's avoid that call if S1 only

Will do.

> 
> What about the error handling. I am not sure we are covering all the
> cases listed in 7.3.12 F_WALK_EABT for instance?

Yes, I think that can be used for some of the S2 fetch errors, it seems
also overlap with other events, for example:
F_WALK_EABT:
     Translation of an IPA after successful stage 1 translation (or, in stage 2-only
     configuration, an input IPA)

F_TRANSLATION
    If translating an IPA for a transaction (whether by input to stage 2-only
    configuration, or after successful stage 1 translation), CLASS == IN, and IPA is
    provided.


I will have a deeper look and rework any missing events or add comments.

> > +                goto error_no_stage;
> > +            }
> >              level++;
> >              continue;
> >          } else if (is_page_pte(pte, level)) {
> > @@ -384,7 +431,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
> >          tlbe->entry.translated_addr = gpa;
> >          tlbe->entry.iova = iova & ~mask;
> >          tlbe->entry.addr_mask = mask;
> > -        tlbe->entry.perm = PTE_AP_TO_PERM(ap);
> > +        tlbe->parent_perm = tlbe->entry.perm = PTE_AP_TO_PERM(ap);
> >          tlbe->level = level;
> >          tlbe->granule = granule_sz;
> >          return 0;
> > @@ -393,6 +440,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
> >  
> >  error:
> >      info->stage = SMMU_STAGE_1;
> > +error_no_stage:
> >      tlbe->entry.perm = IOMMU_NONE;
> >      return -EINVAL;
> >  }
> > @@ -505,7 +553,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
> >          tlbe->entry.translated_addr = gpa;
> >          tlbe->entry.iova = ipa & ~mask;
> >          tlbe->entry.addr_mask = mask;
> > -        tlbe->entry.perm = s2ap;
> > +        tlbe->parent_perm = tlbe->entry.perm = s2ap;
> >          tlbe->level = level;
> >          tlbe->granule = granule_sz;
> >          return 0;
> > @@ -518,6 +566,28 @@ error:
> >      return -EINVAL;
> >  }
> >  
> > +/* Combine 2 TLB enteries and return in tlbe. */
> entries.
> Would suggest combine

Will do.

> > +static void combine_tlb(SMMUTLBEntry *tlbe, SMMUTLBEntry *tlbe_s2,
> > +                        dma_addr_t iova, SMMUTransCfg *cfg)
> > +{
> > +        if (cfg->stage == SMMU_NESTED) {
> > +            tlbe->entry.addr_mask = MIN(tlbe->entry.addr_mask,
> > +                                        tlbe_s2->entry.addr_mask);
> > +            tlbe->entry.translated_addr = CACHED_ENTRY_TO_ADDR(tlbe_s2,
> > +                                          tlbe->entry.translated_addr);
> > +
> > +            tlbe->granule = MIN(tlbe->granule, tlbe_s2->granule);
> could you add a link to the spec so we can clearly check what this code
> is written against?

The spec doesn’t define the microarchitecture of the TLBs, it is an
implementation choice as long as it satisfies the TLB requirements
(mentioned in the cover letter)
- ARM ARM DDI 0487J.a: RLGSCG, RTVTYQ, RGNJPZ
- ARM IHI 0070F.b: 16.2 Caching

I will better document the TLB architecture.

> > +            tlbe->level = MAX(tlbe->level, tlbe_s2->level);
> > +            tlbe->entry.iova = iova & ~tlbe->entry.addr_mask;
> > +            /* parent_perm has s2 perm while perm has s1 perm. */
> > +            tlbe->parent_perm = tlbe_s2->entry.perm;
> > +            return;
> > +        }
> > +
> > +        /* That was not nested, use the s2. */
> should be removed and tested ;-)

Ah, who needs testing :)

> > +        memcpy(tlbe, tlbe_s2, sizeof(*tlbe));
> You shall avoid doing that memcpy if not necessary. I would advocate for
> separate paths for S2 and nested.

Will do.

> > +}
> > +
> >  /**
> >   * smmu_ptw - Walk the page tables for an IOVA, according to @cfg
> need to update the above args desc

Will do.

> >   *
> > @@ -530,28 +600,59 @@ error:
> >   * return 0 on success
> >   */
> >  int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
> > -             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
> > +             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info, SMMUState *bs)
> >  {
> > -    if (cfg->stage == SMMU_STAGE_1) {
> > -        return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info);
> > -    } else if (cfg->stage == SMMU_STAGE_2) {
> > -        /*
> > -         * If bypassing stage 1(or unimplemented), the input address is passed
> > -         * directly to stage 2 as IPA. If the input address of a transaction
> > -         * exceeds the size of the IAS, a stage 1 Address Size fault occurs.
> > -         * For AA64, IAS = OAS according to (IHI 0070.E.a) "3.4 Address sizes"
> > -         */
> > -        if (iova >= (1ULL << cfg->oas)) {
> > -            info->type = SMMU_PTW_ERR_ADDR_SIZE;
> > -            info->stage = SMMU_STAGE_1;
> > -            tlbe->entry.perm = IOMMU_NONE;
> > -            return -EINVAL;
> > +    int ret = 0;
> > +    SMMUTLBEntry tlbe_s2;
> > +    dma_addr_t ipa = iova;
> > +
> > +    if (cfg->stage & SMMU_STAGE_1) {
> > +        ret = smmu_ptw_64_s1(cfg, iova, perm, tlbe, info, bs);
> > +        if (ret) {
> > +            return ret;
> >          }
> > +        /* This is the IPA for next stage.*/
> but you don't necessarily have the S2 enabled so this is not necessarily
> an IPA

I will update the comment.

> > +        ipa = CACHED_ENTRY_TO_ADDR(tlbe, iova);
> > +    }
> >  
> > -        return smmu_ptw_64_s2(cfg, iova, perm, tlbe, info);
> > +    /*
> > +     * The address output from the translation causes a stage 1 Address Size
> > +     * fault if it exceeds the range of the effective IPA size for the given CD.
> > +     * If bypassing stage 1(or unimplemented), the input address is passed
> > +     * directly to stage 2 as IPA. If the input address of a transaction
> > +     * exceeds the size of the IAS, a stage 1 Address Size fault occurs.
> > +     * For AA64, IAS = OAS according to (IHI 0070.E.a) "3.4 Address sizes"
> > +     */
> > +    if (ipa >= (1ULL << cfg->oas)) {
> in case we have S2 only, would be clearer to use ias instead (despite
> above comment say they are the same)

It was written this as it can be used in both cases where IAS is
limited by the CD, I will double check.

> > +        info->type = SMMU_PTW_ERR_ADDR_SIZE;
> > +        info->stage = SMMU_STAGE_1;
> What does the stage really means. That should be documented in the
> struct I think

This should update the s2 field in translation events (event->u.*.s2),
I will add a comment.


> > +        tlbe->entry.perm = IOMMU_NONE;
> > +        return -EINVAL;
> >      }
> this check also is introduced for S1 only. If this is a fix this should
> be brought separately.
> Also the above comment refers to IPA. Does it also hold for S1 only. Is
> the check identical in that case?

I believe that is a fix, I will double check and add it in a separate patch.

> >  
> > -    g_assert_not_reached();
> > +    if (cfg->stage & SMMU_STAGE_2) {
> > +        ret = smmu_ptw_64_s2(cfg, ipa, perm, &tlbe_s2, info);
> > +        if (ret) {
> > +            return ret;
> > +        }
> > +        combine_tlb(tlbe, &tlbe_s2, iova, cfg);
> > +    }
> I think I would prefer the S1, S2, nested cases cleary separated at the
> price of some code duplication. I am afraid serializing stuff make the
> code less maintainable.
> Also it is important the S1 case is not altered in terms of perf.

I see, I will have that in mind when splitting the patches.

> > +
> > +    return ret;
> > +}
> > +
> > +static int validate_tlb_entry(SMMUTLBEntry *cached_entry, IOMMUAccessFlags flag,
> > +                              SMMUPTWEventInfo *info)
> > +{
> > +        if ((flag & IOMMU_WO) && !(cached_entry->entry.perm &
> > +            cached_entry->parent_perm & IOMMU_WO)) {
> > +            info->type = SMMU_PTW_ERR_PERMISSION;
> > +            info->stage = !(cached_entry->entry.perm & IOMMU_WO) ?
> > +                          SMMU_STAGE_1 :
> > +                          SMMU_STAGE_2;
> > +            return -EINVAL;
> > +        }
> > +        return 0;
> >  }
> >  
> >  SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
> > @@ -595,16 +696,14 @@ SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
> >  
> >      cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr);
> >      if (cached_entry) {
> > -        if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
> > -            info->type = SMMU_PTW_ERR_PERMISSION;
> > -            info->stage = cfg->stage;
> > +        if (validate_tlb_entry(cached_entry, flag, info)) {
> >              return NULL;
> >          }
> >          return cached_entry;
> >      }
> >  
> >      cached_entry = g_new0(SMMUTLBEntry, 1);
> > -    status = smmu_ptw(cfg, aligned_addr, flag, cached_entry, info);
> > +    status = smmu_ptw(cfg, aligned_addr, flag, cached_entry, info, bs);
> >      if (status) {
> >              g_free(cached_entry);
> >              return NULL;
> > diff --git a/hw/arm/trace-events b/hw/arm/trace-events
> > index cc12924a84..5f23f0b963 100644
> > --- a/hw/arm/trace-events
> > +++ b/hw/arm/trace-events
> > @@ -15,9 +15,9 @@ smmu_iotlb_inv_asid(uint16_t asid) "IOTLB invalidate asid=%d"
> >  smmu_iotlb_inv_vmid(uint16_t vmid) "IOTLB invalidate vmid=%d"
> >  smmu_iotlb_inv_iova(uint16_t asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64
> >  smmu_inv_notifiers_mr(const char *name) "iommu mr=%s"
> > -smmu_iotlb_lookup_hit(uint16_t asid, uint16_t vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
> > -smmu_iotlb_lookup_miss(uint16_t asid, uint16_t vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
> > -smmu_iotlb_insert(uint16_t asid, uint16_t vmid, uint64_t addr, uint8_t tg, uint8_t level) "IOTLB ++ asid=%d vmid=%d addr=0x%"PRIx64" tg=%d level=%d"
> > +smmu_iotlb_lookup_hit(int asid, uint16_t vmid, uint64_t addr, uint64_t mask, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" mask=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
> > +smmu_iotlb_lookup_miss(int asid, uint16_t vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
> > +smmu_iotlb_insert(int asid, uint16_t vmid, uint64_t addr, uint8_t tg, uint8_t level, uint64_t translate_addr) "IOTLB ++ asid=%d vmid=%d addr=0x%"PRIx64" tg=%d level=%d translate_addr=0x%"PRIx64
> >  
> >  # smmuv3.c
> >  smmuv3_read_mmio(uint64_t addr, uint64_t val, unsigned size, uint32_t r) "addr: 0x%"PRIx64" val:0x%"PRIx64" size: 0x%x(%d)"
> > diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
> > index 2772175115..03ff0f02ba 100644
> > --- a/include/hw/arm/smmu-common.h
> > +++ b/include/hw/arm/smmu-common.h
> > @@ -91,6 +91,7 @@ typedef struct SMMUTLBEntry {
> >      IOMMUTLBEntry entry;
> >      uint8_t level;
> >      uint8_t granule;
> > +    IOMMUAccessFlags parent_perm;
> >  } SMMUTLBEntry;
> >  
> >  /* Stage-2 configuration. */
> > @@ -198,7 +199,7 @@ static inline uint16_t smmu_get_sid(SMMUDevice *sdev)
> >   * pair, according to @cfg translation config
> >   */
> >  int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
> > -             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info);
> > +             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info, SMMUState *bs);
> >  
> >  
> >  /*
> To be honest this patch is quite complex to review. I would suggest you
> try to split it.

Yes, sorry about that :/ I thought it might be easier to have related
stuff in one patch, I will split it to 4-5 patches.


Thanks,
Mostafa
> Thanks
> 
> Eric
>
diff mbox series

Patch

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 771b9c79a3..2cf27b490b 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -66,8 +66,10 @@  SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova,
     return key;
 }
 
-SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
-                                SMMUTransTableInfo *tt, hwaddr iova)
+static SMMUTLBEntry *smmu_iotlb_lookup_all_levels(SMMUState *bs,
+                                                  SMMUTransCfg *cfg,
+                                                  SMMUTransTableInfo *tt,
+                                                  hwaddr iova)
 {
     uint8_t tg = (tt->granule_sz - 10) / 2;
     uint8_t inputsize = 64 - tt->tsz;
@@ -88,10 +90,29 @@  SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
         }
         level++;
     }
+    return entry;
+}
+
+SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
+                                SMMUTransTableInfo *tt, hwaddr iova)
+{
+    SMMUTLBEntry *entry = NULL;
+
+    entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
+    /*
+     * For nested translation also use the s2 granule, as the TLB will insert
+     * the smallest of both, so the entry can be cached with the s2 granule.
+     */
+    if (!entry && (cfg->stage == SMMU_NESTED) &&
+        (cfg->s2cfg.granule_sz != tt->granule_sz)) {
+        tt->granule_sz = cfg->s2cfg.granule_sz;
+        entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
+    }
 
     if (entry) {
         cfg->iotlb_hits++;
         trace_smmu_iotlb_lookup_hit(cfg->asid, cfg->s2cfg.vmid, iova,
+                                    entry->entry.addr_mask,
                                     cfg->iotlb_hits, cfg->iotlb_misses,
                                     100 * cfg->iotlb_hits /
                                     (cfg->iotlb_hits + cfg->iotlb_misses));
@@ -117,7 +138,7 @@  void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *new)
     *key = smmu_get_iotlb_key(cfg->asid, cfg->s2cfg.vmid, new->entry.iova,
                               tg, new->level);
     trace_smmu_iotlb_insert(cfg->asid, cfg->s2cfg.vmid, new->entry.iova,
-                            tg, new->level);
+                            tg, new->level, new->entry.translated_addr);
     g_hash_table_insert(bs->iotlb, key, new);
 }
 
@@ -286,6 +307,27 @@  SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova)
     return NULL;
 }
 
+/* Return the correct table address based on configuration. */
+static inline int translate_table_s1(dma_addr_t *table_addr, SMMUTransCfg *cfg,
+                                     SMMUPTWEventInfo *info, SMMUState *bs)
+{
+    dma_addr_t addr = *table_addr;
+    SMMUTLBEntry *cached_entry;
+
+    if (cfg->stage != SMMU_NESTED) {
+        return 0;
+    }
+
+    CALL_FUNC_CFG_S2(cfg, cached_entry, smmu_translate,
+                     bs, cfg, addr, IOMMU_RO, info);
+
+    if (cached_entry) {
+        *table_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr);
+        return 0;
+    }
+    return -EINVAL;
+}
+
 /**
  * smmu_ptw_64_s1 - VMSAv8-64 Walk of the page tables for a given IOVA
  * @cfg: translation config
@@ -301,7 +343,8 @@  SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova)
  */
 static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
                           dma_addr_t iova, IOMMUAccessFlags perm,
-                          SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
+                          SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info,
+                          SMMUState *bs)
 {
     dma_addr_t baseaddr, indexmask;
     SMMUStage stage = cfg->stage;
@@ -349,6 +392,10 @@  static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
                 goto error;
             }
             baseaddr = get_table_pte_address(pte, granule_sz);
+            /* In case of failure, retain stage-2 fault. */
+            if (translate_table_s1(&baseaddr, cfg, info, bs)) {
+                goto error_no_stage;
+            }
             level++;
             continue;
         } else if (is_page_pte(pte, level)) {
@@ -384,7 +431,7 @@  static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
         tlbe->entry.translated_addr = gpa;
         tlbe->entry.iova = iova & ~mask;
         tlbe->entry.addr_mask = mask;
-        tlbe->entry.perm = PTE_AP_TO_PERM(ap);
+        tlbe->parent_perm = tlbe->entry.perm = PTE_AP_TO_PERM(ap);
         tlbe->level = level;
         tlbe->granule = granule_sz;
         return 0;
@@ -393,6 +440,7 @@  static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 
 error:
     info->stage = SMMU_STAGE_1;
+error_no_stage:
     tlbe->entry.perm = IOMMU_NONE;
     return -EINVAL;
 }
@@ -505,7 +553,7 @@  static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
         tlbe->entry.translated_addr = gpa;
         tlbe->entry.iova = ipa & ~mask;
         tlbe->entry.addr_mask = mask;
-        tlbe->entry.perm = s2ap;
+        tlbe->parent_perm = tlbe->entry.perm = s2ap;
         tlbe->level = level;
         tlbe->granule = granule_sz;
         return 0;
@@ -518,6 +566,28 @@  error:
     return -EINVAL;
 }
 
+/* Combine 2 TLB enteries and return in tlbe. */
+static void combine_tlb(SMMUTLBEntry *tlbe, SMMUTLBEntry *tlbe_s2,
+                        dma_addr_t iova, SMMUTransCfg *cfg)
+{
+        if (cfg->stage == SMMU_NESTED) {
+            tlbe->entry.addr_mask = MIN(tlbe->entry.addr_mask,
+                                        tlbe_s2->entry.addr_mask);
+            tlbe->entry.translated_addr = CACHED_ENTRY_TO_ADDR(tlbe_s2,
+                                          tlbe->entry.translated_addr);
+
+            tlbe->granule = MIN(tlbe->granule, tlbe_s2->granule);
+            tlbe->level = MAX(tlbe->level, tlbe_s2->level);
+            tlbe->entry.iova = iova & ~tlbe->entry.addr_mask;
+            /* parent_perm has s2 perm while perm has s1 perm. */
+            tlbe->parent_perm = tlbe_s2->entry.perm;
+            return;
+        }
+
+        /* That was not nested, use the s2. */
+        memcpy(tlbe, tlbe_s2, sizeof(*tlbe));
+}
+
 /**
  * smmu_ptw - Walk the page tables for an IOVA, according to @cfg
  *
@@ -530,28 +600,59 @@  error:
  * return 0 on success
  */
 int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
-             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
+             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info, SMMUState *bs)
 {
-    if (cfg->stage == SMMU_STAGE_1) {
-        return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info);
-    } else if (cfg->stage == SMMU_STAGE_2) {
-        /*
-         * If bypassing stage 1(or unimplemented), the input address is passed
-         * directly to stage 2 as IPA. If the input address of a transaction
-         * exceeds the size of the IAS, a stage 1 Address Size fault occurs.
-         * For AA64, IAS = OAS according to (IHI 0070.E.a) "3.4 Address sizes"
-         */
-        if (iova >= (1ULL << cfg->oas)) {
-            info->type = SMMU_PTW_ERR_ADDR_SIZE;
-            info->stage = SMMU_STAGE_1;
-            tlbe->entry.perm = IOMMU_NONE;
-            return -EINVAL;
+    int ret = 0;
+    SMMUTLBEntry tlbe_s2;
+    dma_addr_t ipa = iova;
+
+    if (cfg->stage & SMMU_STAGE_1) {
+        ret = smmu_ptw_64_s1(cfg, iova, perm, tlbe, info, bs);
+        if (ret) {
+            return ret;
         }
+        /* This is the IPA for next stage.*/
+        ipa = CACHED_ENTRY_TO_ADDR(tlbe, iova);
+    }
 
-        return smmu_ptw_64_s2(cfg, iova, perm, tlbe, info);
+    /*
+     * The address output from the translation causes a stage 1 Address Size
+     * fault if it exceeds the range of the effective IPA size for the given CD.
+     * If bypassing stage 1(or unimplemented), the input address is passed
+     * directly to stage 2 as IPA. If the input address of a transaction
+     * exceeds the size of the IAS, a stage 1 Address Size fault occurs.
+     * For AA64, IAS = OAS according to (IHI 0070.E.a) "3.4 Address sizes"
+     */
+    if (ipa >= (1ULL << cfg->oas)) {
+        info->type = SMMU_PTW_ERR_ADDR_SIZE;
+        info->stage = SMMU_STAGE_1;
+        tlbe->entry.perm = IOMMU_NONE;
+        return -EINVAL;
     }
 
-    g_assert_not_reached();
+    if (cfg->stage & SMMU_STAGE_2) {
+        ret = smmu_ptw_64_s2(cfg, ipa, perm, &tlbe_s2, info);
+        if (ret) {
+            return ret;
+        }
+        combine_tlb(tlbe, &tlbe_s2, iova, cfg);
+    }
+
+    return ret;
+}
+
+static int validate_tlb_entry(SMMUTLBEntry *cached_entry, IOMMUAccessFlags flag,
+                              SMMUPTWEventInfo *info)
+{
+        if ((flag & IOMMU_WO) && !(cached_entry->entry.perm &
+            cached_entry->parent_perm & IOMMU_WO)) {
+            info->type = SMMU_PTW_ERR_PERMISSION;
+            info->stage = !(cached_entry->entry.perm & IOMMU_WO) ?
+                          SMMU_STAGE_1 :
+                          SMMU_STAGE_2;
+            return -EINVAL;
+        }
+        return 0;
 }
 
 SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
@@ -595,16 +696,14 @@  SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
 
     cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr);
     if (cached_entry) {
-        if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
-            info->type = SMMU_PTW_ERR_PERMISSION;
-            info->stage = cfg->stage;
+        if (validate_tlb_entry(cached_entry, flag, info)) {
             return NULL;
         }
         return cached_entry;
     }
 
     cached_entry = g_new0(SMMUTLBEntry, 1);
-    status = smmu_ptw(cfg, aligned_addr, flag, cached_entry, info);
+    status = smmu_ptw(cfg, aligned_addr, flag, cached_entry, info, bs);
     if (status) {
             g_free(cached_entry);
             return NULL;
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index cc12924a84..5f23f0b963 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -15,9 +15,9 @@  smmu_iotlb_inv_asid(uint16_t asid) "IOTLB invalidate asid=%d"
 smmu_iotlb_inv_vmid(uint16_t vmid) "IOTLB invalidate vmid=%d"
 smmu_iotlb_inv_iova(uint16_t asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64
 smmu_inv_notifiers_mr(const char *name) "iommu mr=%s"
-smmu_iotlb_lookup_hit(uint16_t asid, uint16_t vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
-smmu_iotlb_lookup_miss(uint16_t asid, uint16_t vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
-smmu_iotlb_insert(uint16_t asid, uint16_t vmid, uint64_t addr, uint8_t tg, uint8_t level) "IOTLB ++ asid=%d vmid=%d addr=0x%"PRIx64" tg=%d level=%d"
+smmu_iotlb_lookup_hit(int asid, uint16_t vmid, uint64_t addr, uint64_t mask, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" mask=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
+smmu_iotlb_lookup_miss(int asid, uint16_t vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
+smmu_iotlb_insert(int asid, uint16_t vmid, uint64_t addr, uint8_t tg, uint8_t level, uint64_t translate_addr) "IOTLB ++ asid=%d vmid=%d addr=0x%"PRIx64" tg=%d level=%d translate_addr=0x%"PRIx64
 
 # smmuv3.c
 smmuv3_read_mmio(uint64_t addr, uint64_t val, unsigned size, uint32_t r) "addr: 0x%"PRIx64" val:0x%"PRIx64" size: 0x%x(%d)"
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index 2772175115..03ff0f02ba 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -91,6 +91,7 @@  typedef struct SMMUTLBEntry {
     IOMMUTLBEntry entry;
     uint8_t level;
     uint8_t granule;
+    IOMMUAccessFlags parent_perm;
 } SMMUTLBEntry;
 
 /* Stage-2 configuration. */
@@ -198,7 +199,7 @@  static inline uint16_t smmu_get_sid(SMMUDevice *sdev)
  * pair, according to @cfg translation config
  */
 int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
-             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info);
+             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info, SMMUState *bs);
 
 
 /*