diff mbox series

[RFC,04/10] intel_iommu: Second Stage Access Dirty bit support

Message ID 20220428211351.3897-5-joao.m.martins@oracle.com (mailing list archive)
State New, archived
Headers show
Series hw/vfio, x86/iommu: IOMMUFD Dirty Tracking | expand

Commit Message

Joao Martins April 28, 2022, 9:13 p.m. UTC
IOMMU advertises Access/Dirty bits if the extended capability
DMAR register reports it (ECAP, mnemonic ECAP.SSADS albeit it used
to be known as SLADS before). The first stage table, though, has no bit for
advertising Access/Dirty, unless referenced via a scalable-mode PASID Entry.
Relevant Intel IOMMU SDM ref for first stage table "3.6.2 Accessed, Extended
Accessed, and Dirty Flags" and second stage table "3.7.2 Accessed and Dirty
Flags".

To enable it we depend on scalable-mode for the second-stage table,
so we limit use of dirty-bit to scalable-mode To use SSADS, we set a bit in
the scalable-mode PASID Table entry, by setting bit 9 (SSADE). When
we do so we require flushing the context/pasid-table caches and IOTLB much
like AMD. Relevant SDM refs:

"3.7.2 Accessed and Dirty Flags"
"6.5.3.3 Guidance to Software for Invalidations,
 Table 23. Guidance to Software for Invalidations"

To read out what's dirtied, same thing as past implementations is done.
Dirty bit support is located in the same location (bit 9). The IOTLB
caches some attributes when SSADE is enabled and dirty-ness information,
so we also need to flush IOTLB to make sure IOMMU attempts to set the
dirty bit again. Relevant manuals over the hardware translation is
chapter 6 with some special mention to:

"6.2.3.1 Scalable-Mode PASID-Table Entry Programming Considerations"
"6.2.4 IOTLB"

The first 12bits of the PTE are already cached via the SLPTE pointer,
similar to how it is added in amd-iommu. Use also the previously
added PASID entry cache in order to fetch whether Dirty bit was
enabled or not in the SM second stage table.

This is useful for covering and prototyping IOMMU support for access/dirty
bits.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 hw/i386/intel_iommu.c          | 85 ++++++++++++++++++++++++++++++----
 hw/i386/intel_iommu_internal.h |  4 ++
 hw/i386/trace-events           |  2 +
 3 files changed, 82 insertions(+), 9 deletions(-)

Comments

Jason Wang April 29, 2022, 2:26 a.m. UTC | #1
On Fri, Apr 29, 2022 at 5:14 AM Joao Martins <joao.m.martins@oracle.com> wrote:
>
> IOMMU advertises Access/Dirty bits if the extended capability
> DMAR register reports it (ECAP, mnemonic ECAP.SSADS albeit it used
> to be known as SLADS before). The first stage table, though, has no bit for
> advertising Access/Dirty, unless referenced via a scalable-mode PASID Entry.
> Relevant Intel IOMMU SDM ref for first stage table "3.6.2 Accessed, Extended
> Accessed, and Dirty Flags" and second stage table "3.7.2 Accessed and Dirty
> Flags".
>
> To enable it we depend on scalable-mode for the second-stage table,
> so we limit use of dirty-bit to scalable-mode To use SSADS, we set a bit in
> the scalable-mode PASID Table entry, by setting bit 9 (SSADE). When
> we do so we require flushing the context/pasid-table caches and IOTLB much
> like AMD. Relevant SDM refs:
>
> "3.7.2 Accessed and Dirty Flags"
> "6.5.3.3 Guidance to Software for Invalidations,
>  Table 23. Guidance to Software for Invalidations"
>
> To read out what's dirtied, same thing as past implementations is done.
> Dirty bit support is located in the same location (bit 9). The IOTLB
> caches some attributes when SSADE is enabled and dirty-ness information,
> so we also need to flush IOTLB to make sure IOMMU attempts to set the
> dirty bit again. Relevant manuals over the hardware translation is
> chapter 6 with some special mention to:
>
> "6.2.3.1 Scalable-Mode PASID-Table Entry Programming Considerations"
> "6.2.4 IOTLB"
>
> The first 12bits of the PTE are already cached via the SLPTE pointer,
> similar to how it is added in amd-iommu. Use also the previously
> added PASID entry cache in order to fetch whether Dirty bit was
> enabled or not in the SM second stage table.
>
> This is useful for covering and prototyping IOMMU support for access/dirty
> bits.
>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
>  hw/i386/intel_iommu.c          | 85 ++++++++++++++++++++++++++++++----
>  hw/i386/intel_iommu_internal.h |  4 ++
>  hw/i386/trace-events           |  2 +
>  3 files changed, 82 insertions(+), 9 deletions(-)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 752940fa4c0e..e946f793a968 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -651,6 +651,21 @@ static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index)
>      return slpte;
>  }
>
> +/* Get the content of a spte located in @base_addr[@index] */
> +static uint64_t vtd_set_slpte(dma_addr_t base_addr, uint32_t index,
> +                              uint64_t slpte)
> +{
> +
> +    if (dma_memory_write(&address_space_memory,
> +                         base_addr + index * sizeof(slpte), &slpte,
> +                         sizeof(slpte), MEMTXATTRS_UNSPECIFIED)) {
> +        slpte = (uint64_t)-1;
> +        return slpte;
> +    }
> +
> +    return vtd_get_slpte(base_addr, index);
> +}
> +
>  /* Given an iova and the level of paging structure, return the offset
>   * of current level.
>   */
> @@ -720,6 +735,11 @@ static inline bool vtd_pe_present(VTDPASIDEntry *pe)
>      return pe->val[0] & VTD_PASID_ENTRY_P;
>  }
>
> +static inline bool vtd_pe_slad_enabled(VTDPASIDEntry *pe)
> +{
> +    return pe->val[0] & VTD_SM_PASID_ENTRY_SLADE;
> +}
> +
>  static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>                                            uint32_t pasid,
>                                            dma_addr_t addr,
> @@ -1026,6 +1046,33 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
>      return NULL;
>  }
>
> +static inline bool vtd_ssad_update(IntelIOMMUState *s, uint16_t pe_flags,
> +                                   uint64_t *slpte, bool is_write,
> +                                   bool reads, bool writes)
> +{
> +    bool dirty, access = reads;
> +
> +    if (!(pe_flags & VTD_SM_PASID_ENTRY_SLADE)) {
> +        return false;
> +    }
> +
> +    dirty = access = false;
> +
> +    if (is_write && writes && !(*slpte & VTD_SL_D)) {
> +        *slpte |= VTD_SL_D;
> +        trace_vtd_dirty_update(*slpte);
> +        dirty = true;
> +    }
> +
> +    if (((!is_write && reads) || dirty) && !(*slpte & VTD_SL_A)) {
> +        *slpte |= VTD_SL_A;
> +        trace_vtd_access_update(*slpte);
> +        access = true;
> +    }
> +
> +    return dirty || access;
> +}
> +
>  /* Given the @iova, get relevant @slptep. @slpte_level will be the last level
>   * of the translation, can be used for deciding the size of large page.
>   */
> @@ -1039,6 +1086,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
>      uint32_t offset;
>      uint64_t slpte;
>      uint64_t access_right_check;
> +    uint16_t pe_flags;
>
>      if (!vtd_iova_range_check(s, iova, ce, aw_bits)) {
>          error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")",
> @@ -1054,14 +1102,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
>          slpte = vtd_get_slpte(addr, offset);
>
>          if (slpte == (uint64_t)-1) {
> -            error_report_once("%s: detected read error on DMAR slpte "
> -                              "(iova=0x%" PRIx64 ")", __func__, iova);
> -            if (level == vtd_get_iova_level(s, ce)) {
> -                /* Invalid programming of context-entry */
> -                return -VTD_FR_CONTEXT_ENTRY_INV;
> -            } else {
> -                return -VTD_FR_PAGING_ENTRY_INV;
> -            }
> +            goto inv_slpte;
>          }
>          *reads = (*reads) && (slpte & VTD_SL_R);
>          *writes = (*writes) && (slpte & VTD_SL_W);
> @@ -1081,6 +1122,14 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
>          }
>
>          if (vtd_is_last_slpte(slpte, level)) {
> +            pe_flags = vtd_sm_pasid_entry_flags(s, ce);
> +            if (vtd_ssad_update(s, pe_flags, &slpte, is_write,
> +                                *reads, *writes)) {
> +                slpte = vtd_set_slpte(addr, offset, slpte);
> +                if (slpte == (uint64_t)-1) {
> +                    goto inv_slpte;
> +                }
> +            }
>              *slptep = slpte;
>              *slpte_level = level;
>              return 0;
> @@ -1088,6 +1137,16 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
>          addr = vtd_get_slpte_addr(slpte, aw_bits);
>          level--;
>      }
> +
> +inv_slpte:
> +    error_report_once("%s: detected read error on DMAR slpte "
> +                      "(iova=0x%" PRIx64 ")", __func__, iova);
> +    if (level == vtd_get_iova_level(s, ce)) {
> +        /* Invalid programming of context-entry */
> +        return -VTD_FR_CONTEXT_ENTRY_INV;
> +    } else {
> +        return -VTD_FR_PAGING_ENTRY_INV;
> +    }
>  }
>
>  typedef int (*vtd_page_walk_hook)(IOMMUTLBEvent *event, void *private);
> @@ -1742,6 +1801,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>          slpte = iotlb_entry->slpte;
>          access_flags = iotlb_entry->access_flags;
>          page_mask = iotlb_entry->mask;
> +        if (vtd_ssad_update(s, iotlb_entry->sm_pe_flags, &slpte, is_write,
> +                            access_flags & IOMMU_RO, access_flags & IOMMU_WO)) {
> +                uint32_t offset;
> +
> +                offset = vtd_iova_level_offset(addr, vtd_get_iova_level(s, &ce));
> +                vtd_set_slpte(addr, offset, slpte);
> +        }
>          goto out;
>      }
>
> @@ -3693,7 +3759,8 @@ static void vtd_init(IntelIOMMUState *s)
>
>      /* TODO: read cap/ecap from host to decide which cap to be exposed. */
>      if (s->scalable_mode) {
> -        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
> +        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS |
> +                   VTD_ECAP_SLADS;
>      }

We probably need a dedicated command line parameter and make it compat
for pre 7.1 machines.

Otherwise we may break migration.

Thanks

>
>      if (s->snoop_control) {
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 1ff13b40f9bb..c00f6e7b4a72 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -192,6 +192,7 @@
>  #define VTD_ECAP_MHMV               (15ULL << 20)
>  #define VTD_ECAP_SRS                (1ULL << 31)
>  #define VTD_ECAP_SMTS               (1ULL << 43)
> +#define VTD_ECAP_SLADS              (1ULL << 45)
>  #define VTD_ECAP_SLTS               (1ULL << 46)
>
>  /* CAP_REG */
> @@ -492,6 +493,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>
>  #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted guest-address-width */
>  #define VTD_SM_PASID_ENTRY_DID(val)    ((val) & VTD_DOMAIN_ID_MASK)
> +#define VTD_SM_PASID_ENTRY_SLADE       (1ULL << 9)
>
>  /* Second Level Page Translation Pointer*/
>  #define VTD_SM_PASID_ENTRY_SLPTPTR     (~0xfffULL)
> @@ -515,5 +517,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>  #define VTD_SL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw))
>  #define VTD_SL_IGN_COM              0xbff0000000000000ULL
>  #define VTD_SL_TM                   (1ULL << 62)
> +#define VTD_SL_A                    (1ULL << 8)
> +#define VTD_SL_D                    (1ULL << 9)
>
>  #endif
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index eb5f075873cd..e4122ee8a999 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -66,6 +66,8 @@ vtd_frr_new(int index, uint64_t hi, uint64_t lo) "index %d high 0x%"PRIx64" low
>  vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16
>  vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)"
>  vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)"
> +vtd_access_update(uint64_t slpte) "slpte 0x%"PRIx64
> +vtd_dirty_update(uint64_t slpte) "slpte 0x%"PRIx64
>
>  # amd_iommu.c
>  amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
> --
> 2.17.2
>
Joao Martins April 29, 2022, 9:12 a.m. UTC | #2
On 4/29/22 03:26, Jason Wang wrote:
> On Fri, Apr 29, 2022 at 5:14 AM Joao Martins <joao.m.martins@oracle.com> wrote:
>> @@ -3693,7 +3759,8 @@ static void vtd_init(IntelIOMMUState *s)
>>
>>      /* TODO: read cap/ecap from host to decide which cap to be exposed. */
>>      if (s->scalable_mode) {
>> -        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
>> +        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS |
>> +                   VTD_ECAP_SLADS;
>>      }
> 
> We probably need a dedicated command line parameter and make it compat
> for pre 7.1 machines.
> 
> Otherwise we may break migration.

I can gate over an 'x-ssads' option (default disabled). Which reminds me that I probably
should rename to the most recent mnemonic (as SLADS no longer exists in manuals).

If we all want by default enabled I can add a separate patch to do so.

	Joao
Peter Xu April 29, 2022, 6:21 p.m. UTC | #3
On Fri, Apr 29, 2022 at 10:12:01AM +0100, Joao Martins wrote:
> On 4/29/22 03:26, Jason Wang wrote:
> > On Fri, Apr 29, 2022 at 5:14 AM Joao Martins <joao.m.martins@oracle.com> wrote:
> >> @@ -3693,7 +3759,8 @@ static void vtd_init(IntelIOMMUState *s)
> >>
> >>      /* TODO: read cap/ecap from host to decide which cap to be exposed. */
> >>      if (s->scalable_mode) {
> >> -        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
> >> +        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS |
> >> +                   VTD_ECAP_SLADS;
> >>      }
> > 
> > We probably need a dedicated command line parameter and make it compat
> > for pre 7.1 machines.
> > 
> > Otherwise we may break migration.
> 
> I can gate over an 'x-ssads' option (default disabled). Which reminds me that I probably
> should rename to the most recent mnemonic (as SLADS no longer exists in manuals).
> 
> If we all want by default enabled I can add a separate patch to do so.

The new option sounds good.

Jason, per our previous discussion, shall we not worry about the
compatibility issues per machine-type until the whole feature reaches a
mostly-complete stage?

There seems to have a bunch of sub-features for scalable mode and it's a
large project as a whole.  I'm worried trying to maintain compatibilities
for all the small sub-features could be an unnessary burden to the code
base.

Thanks,
Joao Martins May 3, 2022, 11:54 a.m. UTC | #4
On 4/29/22 19:21, Peter Xu wrote:
> On Fri, Apr 29, 2022 at 10:12:01AM +0100, Joao Martins wrote:
>> On 4/29/22 03:26, Jason Wang wrote:
>>> On Fri, Apr 29, 2022 at 5:14 AM Joao Martins <joao.m.martins@oracle.com> wrote:
>>>> @@ -3693,7 +3759,8 @@ static void vtd_init(IntelIOMMUState *s)
>>>>
>>>>      /* TODO: read cap/ecap from host to decide which cap to be exposed. */
>>>>      if (s->scalable_mode) {
>>>> -        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
>>>> +        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS |
>>>> +                   VTD_ECAP_SLADS;
>>>>      }
>>>
>>> We probably need a dedicated command line parameter and make it compat
>>> for pre 7.1 machines.
>>>
>>> Otherwise we may break migration.
>>
>> I can gate over an 'x-ssads' option (default disabled). Which reminds me that I probably
>> should rename to the most recent mnemonic (as SLADS no longer exists in manuals).
>>
>> If we all want by default enabled I can add a separate patch to do so.
> 
> The new option sounds good.
> 

OK, I'll fix it then for the next iteration.

Also, perhaps I might take the emulated iommu patches out of the iommufd stuff into a
separate series. There might be a place for them in the realm of testing/prototyping.

> Jason, per our previous discussion, shall we not worry about the
> compatibility issues per machine-type until the whole feature reaches a
> mostly-complete stage?
> 
> There seems to have a bunch of sub-features for scalable mode and it's a
> large project as a whole.  I'm worried trying to maintain compatibilities
> for all the small sub-features could be an unnessary burden to the code
> base.

Perhaps best to see how close we are to spec is to check what we support in intel-iommu
in terms of VT-d revision versus how many buckets we fill in. I think SLADS/SSADS was in
3.0 IIRC.

I can take the compat stuff out if it's too early for that -- But I take it
these are questions for Jason.
Peter Xu May 4, 2022, 8:11 p.m. UTC | #5
Hi, Joao,

On Thu, Apr 28, 2022 at 10:13:45PM +0100, Joao Martins wrote:
> +/* Get the content of a spte located in @base_addr[@index] */
> +static uint64_t vtd_set_slpte(dma_addr_t base_addr, uint32_t index,
> +                              uint64_t slpte)
> +{
> +
> +    if (dma_memory_write(&address_space_memory,
> +                         base_addr + index * sizeof(slpte), &slpte,
> +                         sizeof(slpte), MEMTXATTRS_UNSPECIFIED)) {
> +        slpte = (uint64_t)-1;
> +        return slpte;
> +    }
> +
> +    return vtd_get_slpte(base_addr, index);
> +}

Could I ask when the write succeeded, why need to read slpte again?

Thanks,
Jason Wang May 5, 2022, 7:41 a.m. UTC | #6
On Wed, May 4, 2022 at 4:47 AM Joao Martins <joao.m.martins@oracle.com> wrote:
>
> On 4/29/22 19:21, Peter Xu wrote:
> > On Fri, Apr 29, 2022 at 10:12:01AM +0100, Joao Martins wrote:
> >> On 4/29/22 03:26, Jason Wang wrote:
> >>> On Fri, Apr 29, 2022 at 5:14 AM Joao Martins <joao.m.martins@oracle.com> wrote:
> >>>> @@ -3693,7 +3759,8 @@ static void vtd_init(IntelIOMMUState *s)
> >>>>
> >>>>      /* TODO: read cap/ecap from host to decide which cap to be exposed. */
> >>>>      if (s->scalable_mode) {
> >>>> -        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
> >>>> +        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS |
> >>>> +                   VTD_ECAP_SLADS;
> >>>>      }
> >>>
> >>> We probably need a dedicated command line parameter and make it compat
> >>> for pre 7.1 machines.
> >>>
> >>> Otherwise we may break migration.
> >>
> >> I can gate over an 'x-ssads' option (default disabled). Which reminds me that I probably
> >> should rename to the most recent mnemonic (as SLADS no longer exists in manuals).
> >>
> >> If we all want by default enabled I can add a separate patch to do so.
> >
> > The new option sounds good.
> >
>
> OK, I'll fix it then for the next iteration.
>
> Also, perhaps I might take the emulated iommu patches out of the iommufd stuff into a
> separate series. There might be a place for them in the realm of testing/prototyping.

That would be better.

>
> > Jason, per our previous discussion, shall we not worry about the
> > compatibility issues per machine-type until the whole feature reaches a
> > mostly-complete stage?
> >
> > There seems to have a bunch of sub-features for scalable mode and it's a
> > large project as a whole.  I'm worried trying to maintain compatibilities
> > for all the small sub-features could be an unnessary burden to the code
> > base.

My understanding, if it's not too hard, it looks better for each
sub-features to try its best for compatibility. For this case, having
a dedicated option might help for debugging as well.

> Perhaps best to see how close we are to spec is to check what we support in intel-iommu
> in terms of VT-d revision versus how many buckets we fill in. I think SLADS/SSADS was in
> 3.0 IIRC.
>
> I can take the compat stuff out if it's too early for that -- But I take it
> these are questions for Jason.
>

There's probably no need for the compat stuff, having a dedicated
option and making it disabled by default should be fine.

Thanks
Joao Martins May 5, 2022, 9:54 a.m. UTC | #7
On 5/4/22 21:11, Peter Xu wrote:
> Hi, Joao,
> 
> On Thu, Apr 28, 2022 at 10:13:45PM +0100, Joao Martins wrote:
>> +/* Get the content of a spte located in @base_addr[@index] */
>> +static uint64_t vtd_set_slpte(dma_addr_t base_addr, uint32_t index,
>> +                              uint64_t slpte)
>> +{
>> +
>> +    if (dma_memory_write(&address_space_memory,
>> +                         base_addr + index * sizeof(slpte), &slpte,
>> +                         sizeof(slpte), MEMTXATTRS_UNSPECIFIED)) {
>> +        slpte = (uint64_t)-1;
>> +        return slpte;
>> +    }
>> +
>> +    return vtd_get_slpte(base_addr, index);
>> +}
> 
> Could I ask when the write succeeded, why need to read slpte again?

We don't, I should delete this.

Perhaps I was obsessed that what I set is what I get in the end and this
was a remnant of it.
Joao Martins May 5, 2022, 9:57 a.m. UTC | #8
On 5/5/22 08:41, Jason Wang wrote:
> On Wed, May 4, 2022 at 4:47 AM Joao Martins <joao.m.martins@oracle.com> wrote:
>>
>> On 4/29/22 19:21, Peter Xu wrote:
>>> On Fri, Apr 29, 2022 at 10:12:01AM +0100, Joao Martins wrote:
>>>> On 4/29/22 03:26, Jason Wang wrote:
>>>>> On Fri, Apr 29, 2022 at 5:14 AM Joao Martins <joao.m.martins@oracle.com> wrote:
>>>>>> @@ -3693,7 +3759,8 @@ static void vtd_init(IntelIOMMUState *s)
>>>>>>
>>>>>>      /* TODO: read cap/ecap from host to decide which cap to be exposed. */
>>>>>>      if (s->scalable_mode) {
>>>>>> -        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
>>>>>> +        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS |
>>>>>> +                   VTD_ECAP_SLADS;
>>>>>>      }
>>>>>
>>>>> We probably need a dedicated command line parameter and make it compat
>>>>> for pre 7.1 machines.
>>>>>
>>>>> Otherwise we may break migration.
>>>>
>>>> I can gate over an 'x-ssads' option (default disabled). Which reminds me that I probably
>>>> should rename to the most recent mnemonic (as SLADS no longer exists in manuals).
>>>>
>>>> If we all want by default enabled I can add a separate patch to do so.
>>>
>>> The new option sounds good.
>>>
>>
>> OK, I'll fix it then for the next iteration.
>>
>> Also, perhaps I might take the emulated iommu patches out of the iommufd stuff into a
>> separate series. There might be a place for them in the realm of testing/prototyping.
> 
> That would be better.
> 
OK, I'll do that then.

>> Perhaps best to see how close we are to spec is to check what we support in intel-iommu
>> in terms of VT-d revision versus how many buckets we fill in. I think SLADS/SSADS was in
>> 3.0 IIRC.
>>
>> I can take the compat stuff out if it's too early for that -- But I take it
>> these are questions for Jason.
>>
> 
> There's probably no need for the compat stuff, having a dedicated
> option and making it disabled by default should be fine.

/me nods
diff mbox series

Patch

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 752940fa4c0e..e946f793a968 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -651,6 +651,21 @@  static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index)
     return slpte;
 }
 
+/* Get the content of a spte located in @base_addr[@index] */
+static uint64_t vtd_set_slpte(dma_addr_t base_addr, uint32_t index,
+                              uint64_t slpte)
+{
+
+    if (dma_memory_write(&address_space_memory,
+                         base_addr + index * sizeof(slpte), &slpte,
+                         sizeof(slpte), MEMTXATTRS_UNSPECIFIED)) {
+        slpte = (uint64_t)-1;
+        return slpte;
+    }
+
+    return vtd_get_slpte(base_addr, index);
+}
+
 /* Given an iova and the level of paging structure, return the offset
  * of current level.
  */
@@ -720,6 +735,11 @@  static inline bool vtd_pe_present(VTDPASIDEntry *pe)
     return pe->val[0] & VTD_PASID_ENTRY_P;
 }
 
+static inline bool vtd_pe_slad_enabled(VTDPASIDEntry *pe)
+{
+    return pe->val[0] & VTD_SM_PASID_ENTRY_SLADE;
+}
+
 static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
                                           uint32_t pasid,
                                           dma_addr_t addr,
@@ -1026,6 +1046,33 @@  static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
     return NULL;
 }
 
+static inline bool vtd_ssad_update(IntelIOMMUState *s, uint16_t pe_flags,
+                                   uint64_t *slpte, bool is_write,
+                                   bool reads, bool writes)
+{
+    bool dirty, access = reads;
+
+    if (!(pe_flags & VTD_SM_PASID_ENTRY_SLADE)) {
+        return false;
+    }
+
+    dirty = access = false;
+
+    if (is_write && writes && !(*slpte & VTD_SL_D)) {
+        *slpte |= VTD_SL_D;
+        trace_vtd_dirty_update(*slpte);
+        dirty = true;
+    }
+
+    if (((!is_write && reads) || dirty) && !(*slpte & VTD_SL_A)) {
+        *slpte |= VTD_SL_A;
+        trace_vtd_access_update(*slpte);
+        access = true;
+    }
+
+    return dirty || access;
+}
+
 /* Given the @iova, get relevant @slptep. @slpte_level will be the last level
  * of the translation, can be used for deciding the size of large page.
  */
@@ -1039,6 +1086,7 @@  static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
     uint32_t offset;
     uint64_t slpte;
     uint64_t access_right_check;
+    uint16_t pe_flags;
 
     if (!vtd_iova_range_check(s, iova, ce, aw_bits)) {
         error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")",
@@ -1054,14 +1102,7 @@  static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
         slpte = vtd_get_slpte(addr, offset);
 
         if (slpte == (uint64_t)-1) {
-            error_report_once("%s: detected read error on DMAR slpte "
-                              "(iova=0x%" PRIx64 ")", __func__, iova);
-            if (level == vtd_get_iova_level(s, ce)) {
-                /* Invalid programming of context-entry */
-                return -VTD_FR_CONTEXT_ENTRY_INV;
-            } else {
-                return -VTD_FR_PAGING_ENTRY_INV;
-            }
+            goto inv_slpte;
         }
         *reads = (*reads) && (slpte & VTD_SL_R);
         *writes = (*writes) && (slpte & VTD_SL_W);
@@ -1081,6 +1122,14 @@  static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
         }
 
         if (vtd_is_last_slpte(slpte, level)) {
+            pe_flags = vtd_sm_pasid_entry_flags(s, ce);
+            if (vtd_ssad_update(s, pe_flags, &slpte, is_write,
+                                *reads, *writes)) {
+                slpte = vtd_set_slpte(addr, offset, slpte);
+                if (slpte == (uint64_t)-1) {
+                    goto inv_slpte;
+                }
+            }
             *slptep = slpte;
             *slpte_level = level;
             return 0;
@@ -1088,6 +1137,16 @@  static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
         addr = vtd_get_slpte_addr(slpte, aw_bits);
         level--;
     }
+
+inv_slpte:
+    error_report_once("%s: detected read error on DMAR slpte "
+                      "(iova=0x%" PRIx64 ")", __func__, iova);
+    if (level == vtd_get_iova_level(s, ce)) {
+        /* Invalid programming of context-entry */
+        return -VTD_FR_CONTEXT_ENTRY_INV;
+    } else {
+        return -VTD_FR_PAGING_ENTRY_INV;
+    }
 }
 
 typedef int (*vtd_page_walk_hook)(IOMMUTLBEvent *event, void *private);
@@ -1742,6 +1801,13 @@  static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
         slpte = iotlb_entry->slpte;
         access_flags = iotlb_entry->access_flags;
         page_mask = iotlb_entry->mask;
+        if (vtd_ssad_update(s, iotlb_entry->sm_pe_flags, &slpte, is_write,
+                            access_flags & IOMMU_RO, access_flags & IOMMU_WO)) {
+                uint32_t offset;
+
+                offset = vtd_iova_level_offset(addr, vtd_get_iova_level(s, &ce));
+                vtd_set_slpte(addr, offset, slpte);
+        }
         goto out;
     }
 
@@ -3693,7 +3759,8 @@  static void vtd_init(IntelIOMMUState *s)
 
     /* TODO: read cap/ecap from host to decide which cap to be exposed. */
     if (s->scalable_mode) {
-        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
+        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS |
+                   VTD_ECAP_SLADS;
     }
 
     if (s->snoop_control) {
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 1ff13b40f9bb..c00f6e7b4a72 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -192,6 +192,7 @@ 
 #define VTD_ECAP_MHMV               (15ULL << 20)
 #define VTD_ECAP_SRS                (1ULL << 31)
 #define VTD_ECAP_SMTS               (1ULL << 43)
+#define VTD_ECAP_SLADS              (1ULL << 45)
 #define VTD_ECAP_SLTS               (1ULL << 46)
 
 /* CAP_REG */
@@ -492,6 +493,7 @@  typedef struct VTDRootEntry VTDRootEntry;
 
 #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted guest-address-width */
 #define VTD_SM_PASID_ENTRY_DID(val)    ((val) & VTD_DOMAIN_ID_MASK)
+#define VTD_SM_PASID_ENTRY_SLADE       (1ULL << 9)
 
 /* Second Level Page Translation Pointer*/
 #define VTD_SM_PASID_ENTRY_SLPTPTR     (~0xfffULL)
@@ -515,5 +517,7 @@  typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_SL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw))
 #define VTD_SL_IGN_COM              0xbff0000000000000ULL
 #define VTD_SL_TM                   (1ULL << 62)
+#define VTD_SL_A                    (1ULL << 8)
+#define VTD_SL_D                    (1ULL << 9)
 
 #endif
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index eb5f075873cd..e4122ee8a999 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -66,6 +66,8 @@  vtd_frr_new(int index, uint64_t hi, uint64_t lo) "index %d high 0x%"PRIx64" low
 vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16
 vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)"
 vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)"
+vtd_access_update(uint64_t slpte) "slpte 0x%"PRIx64
+vtd_dirty_update(uint64_t slpte) "slpte 0x%"PRIx64
 
 # amd_iommu.c
 amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32