Message ID | 20231020222804.21850-13-joao.m.martins@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | IOMMUFD Dirty Tracking | expand |
On 2023/10/21 06:27, Joao Martins wrote: > IOMMU advertises Access/Dirty bits for second-stage page table if the > extended capability DMAR register reports it (ECAP, mnemonic ECAP.SSADS). > The first stage table is compatible with CPU page table thus A/D bits are > implicitly supported. Relevant Intel IOMMU SDM ref for first stage table > "3.6.2 Accessed, Extended Accessed, and Dirty Flags" and second stage table > "3.7.2 Accessed and Dirty Flags". > > First stage page table is enabled by default so it's allowed to set dirty > tracking and no control bits needed, it just returns 0. To use SSADS, set > bit 9 (SSADE) in the scalable-mode PASID table entry and flush the IOTLB > via pasid_flush_caches() following the manual. Relevant SDM refs: > > "3.7.2 Accessed and Dirty Flags" > "6.5.3.3 Guidance to Software for Invalidations, > Table 23. Guidance to Software for Invalidations" > > PTE dirty bit is located in bit 9 and it's cached in the IOTLB so flush > IOTLB to make sure IOMMU attempts to set the dirty bit again. Note that > iommu_dirty_bitmap_record() will add the IOVA to iotlb_gather and thus the > caller of the iommu op will flush the IOTLB. Relevant manuals over the > hardware translation is chapter 6 with some special mention to: > > "6.2.3.1 Scalable-Mode PASID-Table Entry Programming Considerations" > "6.2.4 IOTLB" > > Select IOMMUFD_DRIVER only if IOMMUFD is enabled, given that IOMMU dirty > tracking requires IOMMUFD. > > Signed-off-by: Joao Martins <joao.m.martins@oracle.com> > Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> > Reviewed-by: Kevin Tian <kevin.tian@intel.com> > --- > drivers/iommu/intel/Kconfig | 1 + > drivers/iommu/intel/iommu.c | 104 +++++++++++++++++++++++++++++++++- > drivers/iommu/intel/iommu.h | 17 ++++++ > drivers/iommu/intel/pasid.c | 108 ++++++++++++++++++++++++++++++++++++ > drivers/iommu/intel/pasid.h | 4 ++ > 5 files changed, 233 insertions(+), 1 deletion(-) normally, the subject of commits to intel iommu driver is started with 'iommu/vt-d'. So if there is a new version, please rename it. Also, SL is a bit eld naming, please use SS (second stage) s/iommu/intel: Access/Dirty bit support for SL domains/iommu/vt-d: Access/Dirty bit support for SS domains > diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig > index 2e56bd79f589..f5348b80652b 100644 > --- a/drivers/iommu/intel/Kconfig > +++ b/drivers/iommu/intel/Kconfig > @@ -15,6 +15,7 @@ config INTEL_IOMMU > select DMA_OPS > select IOMMU_API > select IOMMU_IOVA > + select IOMMUFD_DRIVER if IOMMUFD > select NEED_DMA_MAP_STATE > select DMAR_TABLE > select SWIOTLB > diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c > index 017aed5813d8..d7ba1732130b 100644 > --- a/drivers/iommu/intel/iommu.c > +++ b/drivers/iommu/intel/iommu.c > @@ -300,6 +300,7 @@ static int iommu_skip_te_disable; > #define IDENTMAP_AZALIA 4 > > const struct iommu_ops intel_iommu_ops; > +const struct iommu_dirty_ops intel_dirty_ops; > > static bool translation_pre_enabled(struct intel_iommu *iommu) > { > @@ -4079,8 +4080,10 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags) > { > struct iommu_domain *domain; > struct intel_iommu *iommu; > + bool dirty_tracking; > > - if (flags & (~IOMMU_HWPT_ALLOC_NEST_PARENT)) > + if (flags & (~(IOMMU_HWPT_ALLOC_NEST_PARENT| > + IOMMU_HWPT_ALLOC_DIRTY_TRACKING))) > return ERR_PTR(-EOPNOTSUPP); > > iommu = device_to_iommu(dev, NULL, NULL); > @@ -4090,6 +4093,10 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags) > if ((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) && !ecap_nest(iommu->ecap)) > return ERR_PTR(-EOPNOTSUPP); > > + dirty_tracking = (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING); > + if (dirty_tracking && !slads_supported(iommu)) > + return ERR_PTR(-EOPNOTSUPP); > + > /* > * domain_alloc_user op needs to fully initialize a domain > * before return, so uses iommu_domain_alloc() here for > @@ -4098,6 +4105,15 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags) > domain = iommu_domain_alloc(dev->bus); > if (!domain) > domain = ERR_PTR(-ENOMEM); > + > + if (!IS_ERR(domain) && dirty_tracking) { > + if (to_dmar_domain(domain)->use_first_level) { > + iommu_domain_free(domain); > + return ERR_PTR(-EOPNOTSUPP); > + } > + domain->dirty_ops = &intel_dirty_ops; > + } > + > return domain; > } > > @@ -4121,6 +4137,9 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, > if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap)) > return -EINVAL; > > + if (domain->dirty_ops && !slads_supported(iommu)) > + return -EINVAL; > + > /* check if this iommu agaw is sufficient for max mapped address */ > addr_width = agaw_to_width(iommu->agaw); > if (addr_width > cap_mgaw(iommu->cap)) > @@ -4375,6 +4394,8 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) > return dmar_platform_optin(); > case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: > return ecap_sc_support(info->iommu->ecap); > + case IOMMU_CAP_DIRTY_TRACKING: > + return slads_supported(info->iommu); > default: > return false; > } > @@ -4772,6 +4793,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, > if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) > return -EOPNOTSUPP; > > + if (domain->dirty_ops) > + return -EINVAL; > + > if (context_copied(iommu, info->bus, info->devfn)) > return -EBUSY; > > @@ -4830,6 +4854,84 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type) > return vtd; > } > > +static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, > + bool enable) > +{ > + struct dmar_domain *dmar_domain = to_dmar_domain(domain); > + struct device_domain_info *info; > + int ret; > + > + spin_lock(&dmar_domain->lock); > + if (dmar_domain->dirty_tracking == enable) > + goto out_unlock; > + > + list_for_each_entry(info, &dmar_domain->devices, link) { > + ret = intel_pasid_setup_dirty_tracking(info->iommu, info->domain, > + info->dev, IOMMU_NO_PASID, > + enable); > + if (ret) > + goto err_unwind; > + > + } > + > + dmar_domain->dirty_tracking = enable; > +out_unlock: > + spin_unlock(&dmar_domain->lock); > + > + return 0; > + > +err_unwind: > + list_for_each_entry(info, &dmar_domain->devices, link) > + intel_pasid_setup_dirty_tracking(info->iommu, dmar_domain, > + info->dev, IOMMU_NO_PASID, > + dmar_domain->dirty_tracking); > + spin_unlock(&dmar_domain->lock); > + return ret; > +} > + > +static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain, > + unsigned long iova, size_t size, > + unsigned long flags, > + struct iommu_dirty_bitmap *dirty) > +{ > + struct dmar_domain *dmar_domain = to_dmar_domain(domain); > + unsigned long end = iova + size - 1; > + unsigned long pgsize; > + > + /* > + * IOMMUFD core calls into a dirty tracking disabled domain without an > + * IOVA bitmap set in order to clean dirty bits in all PTEs that might > + * have occurred when we stopped dirty tracking. This ensures that we > + * never inherit dirtied bits from a previous cycle. > + */ > + if (!dmar_domain->dirty_tracking && dirty->bitmap) > + return -EINVAL; > + > + do { > + struct dma_pte *pte; > + int lvl = 0; > + > + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &lvl, > + GFP_ATOMIC); > + pgsize = level_size(lvl) << VTD_PAGE_SHIFT; > + if (!pte || !dma_pte_present(pte)) { > + iova += pgsize; > + continue; > + } > + > + if (dma_sl_pte_test_and_clear_dirty(pte, flags)) > + iommu_dirty_bitmap_record(dirty, iova, pgsize); > + iova += pgsize; > + } while (iova < end); > + > + return 0; > +} > + > +const struct iommu_dirty_ops intel_dirty_ops = { > + .set_dirty_tracking = intel_iommu_set_dirty_tracking, > + .read_and_clear_dirty = intel_iommu_read_and_clear_dirty, > +}; > + > const struct iommu_ops intel_iommu_ops = { > .capable = intel_iommu_capable, > .hw_info = intel_iommu_hw_info, > diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h > index c18fb699c87a..27bcfd3bacdd 100644 > --- a/drivers/iommu/intel/iommu.h > +++ b/drivers/iommu/intel/iommu.h > @@ -48,6 +48,9 @@ > #define DMA_FL_PTE_DIRTY BIT_ULL(6) > #define DMA_FL_PTE_XD BIT_ULL(63) > > +#define DMA_SL_PTE_DIRTY_BIT 9 > +#define DMA_SL_PTE_DIRTY BIT_ULL(DMA_SL_PTE_DIRTY_BIT) > + > #define ADDR_WIDTH_5LEVEL (57) > #define ADDR_WIDTH_4LEVEL (48) > > @@ -539,6 +542,9 @@ enum { > #define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) > #define pasid_supported(iommu) (sm_supported(iommu) && \ > ecap_pasid((iommu)->ecap)) > +#define slads_supported(iommu) (sm_supported(iommu) && \ how about ssads_supporte. > + ecap_slads((iommu)->ecap)) > + remove this empty line. > > struct pasid_entry; > struct pasid_state_entry; Regards, Yi Liu > @@ -592,6 +598,7 @@ struct dmar_domain { > * otherwise, goes through the second > * level. > */ > + u8 dirty_tracking:1; /* Dirty tracking is enabled */ > > spinlock_t lock; /* Protect device tracking lists */ > struct list_head devices; /* all devices' list */ > @@ -781,6 +788,16 @@ static inline bool dma_pte_present(struct dma_pte *pte) > return (pte->val & 3) != 0; > } > > +static inline bool dma_sl_pte_test_and_clear_dirty(struct dma_pte *pte, > + unsigned long flags) > +{ > + if (flags & IOMMU_DIRTY_NO_CLEAR) > + return (pte->val & DMA_SL_PTE_DIRTY) != 0; > + > + return test_and_clear_bit(DMA_SL_PTE_DIRTY_BIT, > + (unsigned long *)&pte->val); > +} > + > static inline bool dma_pte_superpage(struct dma_pte *pte) > { > return (pte->val & DMA_PTE_LARGE_PAGE); > diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c > index 8f92b92f3d2a..9a01d46a56e1 100644 > --- a/drivers/iommu/intel/pasid.c > +++ b/drivers/iommu/intel/pasid.c > @@ -277,6 +277,11 @@ static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) > WRITE_ONCE(*ptr, (old & ~mask) | bits); > } > > +static inline u64 pasid_get_bits(u64 *ptr) > +{ > + return READ_ONCE(*ptr); > +} > + > /* > * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode > * PASID entry. > @@ -335,6 +340,36 @@ static inline void pasid_set_fault_enable(struct pasid_entry *pe) > pasid_set_bits(&pe->val[0], 1 << 1, 0); > } > > +/* > + * Enable second level A/D bits by setting the SLADE (Second Level > + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID > + * entry. > + */ > +static inline void pasid_set_ssade(struct pasid_entry *pe) > +{ > + pasid_set_bits(&pe->val[0], 1 << 9, 1 << 9); > +} > + > +/* > + * Disable second level A/D bits by clearing the SLADE (Second Level > + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID > + * entry. > + */ > +static inline void pasid_clear_ssade(struct pasid_entry *pe) > +{ > + pasid_set_bits(&pe->val[0], 1 << 9, 0); > +} > + > +/* > + * Checks if second level A/D bits specifically the SLADE (Second Level > + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID > + * entry is set. > + */ > +static inline bool pasid_get_ssade(struct pasid_entry *pe) > +{ > + return pasid_get_bits(&pe->val[0]) & (1 << 9); > +} > + > /* > * Setup the WPE(Write Protect Enable) field (Bit 132) of a > * scalable mode PASID entry. > @@ -627,6 +662,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, > pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); > pasid_set_fault_enable(pte); > pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); > + if (domain->dirty_tracking) > + pasid_set_ssade(pte); > > pasid_set_present(pte); > spin_unlock(&iommu->lock); > @@ -636,6 +673,77 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, > return 0; > } > > +/* > + * Set up dirty tracking on a second only or nested translation type. > + */ > +int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, > + struct dmar_domain *domain, > + struct device *dev, u32 pasid, > + bool enabled) > +{ > + struct pasid_entry *pte; > + u16 did, pgtt; > + > + spin_lock(&iommu->lock); > + > + pte = intel_pasid_get_entry(dev, pasid); > + if (!pte) { > + spin_unlock(&iommu->lock); > + dev_err_ratelimited(dev, > + "Failed to get pasid entry of PASID %d\n", > + pasid); > + return -ENODEV; > + } > + > + did = domain_id_iommu(domain, iommu); > + pgtt = pasid_pte_get_pgtt(pte); > + if (pgtt != PASID_ENTRY_PGTT_SL_ONLY && pgtt != PASID_ENTRY_PGTT_NESTED) { > + spin_unlock(&iommu->lock); > + dev_err_ratelimited(dev, > + "Dirty tracking not supported on translation type %d\n", > + pgtt); > + return -EOPNOTSUPP; > + } > + > + if (pasid_get_ssade(pte) == enabled) { > + spin_unlock(&iommu->lock); > + return 0; > + } > + > + if (enabled) > + pasid_set_ssade(pte); > + else > + pasid_clear_ssade(pte); > + spin_unlock(&iommu->lock); > + > + if (!ecap_coherent(iommu->ecap)) > + clflush_cache_range(pte, sizeof(*pte)); > + > + /* > + * From VT-d spec table 25 "Guidance to Software for Invalidations": > + * > + * - PASID-selective-within-Domain PASID-cache invalidation > + * If (PGTT=SS or Nested) > + * - Domain-selective IOTLB invalidation > + * Else > + * - PASID-selective PASID-based IOTLB invalidation > + * - If (pasid is RID_PASID) > + * - Global Device-TLB invalidation to affected functions > + * Else > + * - PASID-based Device-TLB invalidation (with S=1 and > + * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions > + */ > + pasid_cache_invalidation_with_pasid(iommu, did, pasid); > + > + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); > + > + /* Device IOTLB doesn't need to be flushed in caching mode. */ > + if (!cap_caching_mode(iommu->cap)) > + devtlb_invalidation_with_pasid(iommu, dev, pasid); > + > + return 0; > +} > + > /* > * Set up the scalable mode pasid entry for passthrough translation type. > */ > diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h > index 4e9e68c3c388..958050b093aa 100644 > --- a/drivers/iommu/intel/pasid.h > +++ b/drivers/iommu/intel/pasid.h > @@ -106,6 +106,10 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, > int intel_pasid_setup_second_level(struct intel_iommu *iommu, > struct dmar_domain *domain, > struct device *dev, u32 pasid); > +int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, > + struct dmar_domain *domain, > + struct device *dev, u32 pasid, > + bool enabled); > int intel_pasid_setup_pass_through(struct intel_iommu *iommu, > struct dmar_domain *domain, > struct device *dev, u32 pasid)
On 24/10/2023 13:34, Yi Liu wrote: > On 2023/10/21 06:27, Joao Martins wrote: >> IOMMU advertises Access/Dirty bits for second-stage page table if the >> extended capability DMAR register reports it (ECAP, mnemonic ECAP.SSADS). >> The first stage table is compatible with CPU page table thus A/D bits are >> implicitly supported. Relevant Intel IOMMU SDM ref for first stage table >> "3.6.2 Accessed, Extended Accessed, and Dirty Flags" and second stage table >> "3.7.2 Accessed and Dirty Flags". >> >> First stage page table is enabled by default so it's allowed to set dirty >> tracking and no control bits needed, it just returns 0. To use SSADS, set >> bit 9 (SSADE) in the scalable-mode PASID table entry and flush the IOTLB >> via pasid_flush_caches() following the manual. Relevant SDM refs: >> >> "3.7.2 Accessed and Dirty Flags" >> "6.5.3.3 Guidance to Software for Invalidations, >> Table 23. Guidance to Software for Invalidations" >> >> PTE dirty bit is located in bit 9 and it's cached in the IOTLB so flush >> IOTLB to make sure IOMMU attempts to set the dirty bit again. Note that >> iommu_dirty_bitmap_record() will add the IOVA to iotlb_gather and thus the >> caller of the iommu op will flush the IOTLB. Relevant manuals over the >> hardware translation is chapter 6 with some special mention to: >> >> "6.2.3.1 Scalable-Mode PASID-Table Entry Programming Considerations" >> "6.2.4 IOTLB" >> >> Select IOMMUFD_DRIVER only if IOMMUFD is enabled, given that IOMMU dirty >> tracking requires IOMMUFD. >> >> Signed-off-by: Joao Martins <joao.m.martins@oracle.com> >> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> >> Reviewed-by: Kevin Tian <kevin.tian@intel.com> >> --- >> drivers/iommu/intel/Kconfig | 1 + >> drivers/iommu/intel/iommu.c | 104 +++++++++++++++++++++++++++++++++- >> drivers/iommu/intel/iommu.h | 17 ++++++ >> drivers/iommu/intel/pasid.c | 108 ++++++++++++++++++++++++++++++++++++ >> drivers/iommu/intel/pasid.h | 4 ++ >> 5 files changed, 233 insertions(+), 1 deletion(-) > > normally, the subject of commits to intel iommu driver is started > with 'iommu/vt-d'. So if there is a new version, please rename it. > Also, SL is a bit eld naming, please use SS (second stage) > > s/iommu/intel: Access/Dirty bit support for SL domains/iommu/vt-d: Access/Dirty > bit support for SS domains > OK >> diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig >> index 2e56bd79f589..f5348b80652b 100644 >> --- a/drivers/iommu/intel/Kconfig >> +++ b/drivers/iommu/intel/Kconfig >> @@ -15,6 +15,7 @@ config INTEL_IOMMU >> select DMA_OPS >> select IOMMU_API >> select IOMMU_IOVA >> + select IOMMUFD_DRIVER if IOMMUFD >> select NEED_DMA_MAP_STATE >> select DMAR_TABLE >> select SWIOTLB >> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c >> index 017aed5813d8..d7ba1732130b 100644 >> --- a/drivers/iommu/intel/iommu.c >> +++ b/drivers/iommu/intel/iommu.c >> @@ -300,6 +300,7 @@ static int iommu_skip_te_disable; >> #define IDENTMAP_AZALIA 4 >> const struct iommu_ops intel_iommu_ops; >> +const struct iommu_dirty_ops intel_dirty_ops; >> static bool translation_pre_enabled(struct intel_iommu *iommu) >> { >> @@ -4079,8 +4080,10 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 >> flags) >> { >> struct iommu_domain *domain; >> struct intel_iommu *iommu; >> + bool dirty_tracking; >> - if (flags & (~IOMMU_HWPT_ALLOC_NEST_PARENT)) >> + if (flags & (~(IOMMU_HWPT_ALLOC_NEST_PARENT| >> + IOMMU_HWPT_ALLOC_DIRTY_TRACKING))) >> return ERR_PTR(-EOPNOTSUPP); >> iommu = device_to_iommu(dev, NULL, NULL); >> @@ -4090,6 +4093,10 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 >> flags) >> if ((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) && !ecap_nest(iommu->ecap)) >> return ERR_PTR(-EOPNOTSUPP); >> + dirty_tracking = (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING); >> + if (dirty_tracking && !slads_supported(iommu)) >> + return ERR_PTR(-EOPNOTSUPP); >> + >> /* >> * domain_alloc_user op needs to fully initialize a domain >> * before return, so uses iommu_domain_alloc() here for >> @@ -4098,6 +4105,15 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 >> flags) >> domain = iommu_domain_alloc(dev->bus); >> if (!domain) >> domain = ERR_PTR(-ENOMEM); >> + >> + if (!IS_ERR(domain) && dirty_tracking) { >> + if (to_dmar_domain(domain)->use_first_level) { >> + iommu_domain_free(domain); >> + return ERR_PTR(-EOPNOTSUPP); >> + } >> + domain->dirty_ops = &intel_dirty_ops; >> + } >> + >> return domain; >> } >> @@ -4121,6 +4137,9 @@ static int prepare_domain_attach_device(struct >> iommu_domain *domain, >> if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap)) >> return -EINVAL; >> + if (domain->dirty_ops && !slads_supported(iommu)) >> + return -EINVAL; >> + >> /* check if this iommu agaw is sufficient for max mapped address */ >> addr_width = agaw_to_width(iommu->agaw); >> if (addr_width > cap_mgaw(iommu->cap)) >> @@ -4375,6 +4394,8 @@ static bool intel_iommu_capable(struct device *dev, enum >> iommu_cap cap) >> return dmar_platform_optin(); >> case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: >> return ecap_sc_support(info->iommu->ecap); >> + case IOMMU_CAP_DIRTY_TRACKING: >> + return slads_supported(info->iommu); >> default: >> return false; >> } >> @@ -4772,6 +4793,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain >> *domain, >> if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) >> return -EOPNOTSUPP; >> + if (domain->dirty_ops) >> + return -EINVAL; >> + >> if (context_copied(iommu, info->bus, info->devfn)) >> return -EBUSY; >> @@ -4830,6 +4854,84 @@ static void *intel_iommu_hw_info(struct device *dev, >> u32 *length, u32 *type) >> return vtd; >> } >> +static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, >> + bool enable) >> +{ >> + struct dmar_domain *dmar_domain = to_dmar_domain(domain); >> + struct device_domain_info *info; >> + int ret; >> + >> + spin_lock(&dmar_domain->lock); >> + if (dmar_domain->dirty_tracking == enable) >> + goto out_unlock; >> + >> + list_for_each_entry(info, &dmar_domain->devices, link) { >> + ret = intel_pasid_setup_dirty_tracking(info->iommu, info->domain, >> + info->dev, IOMMU_NO_PASID, >> + enable); >> + if (ret) >> + goto err_unwind; >> + >> + } >> + >> + dmar_domain->dirty_tracking = enable; >> +out_unlock: >> + spin_unlock(&dmar_domain->lock); >> + >> + return 0; >> + >> +err_unwind: >> + list_for_each_entry(info, &dmar_domain->devices, link) >> + intel_pasid_setup_dirty_tracking(info->iommu, dmar_domain, >> + info->dev, IOMMU_NO_PASID, >> + dmar_domain->dirty_tracking); >> + spin_unlock(&dmar_domain->lock); >> + return ret; >> +} >> + >> +static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain, >> + unsigned long iova, size_t size, >> + unsigned long flags, >> + struct iommu_dirty_bitmap *dirty) >> +{ >> + struct dmar_domain *dmar_domain = to_dmar_domain(domain); >> + unsigned long end = iova + size - 1; >> + unsigned long pgsize; >> + >> + /* >> + * IOMMUFD core calls into a dirty tracking disabled domain without an >> + * IOVA bitmap set in order to clean dirty bits in all PTEs that might >> + * have occurred when we stopped dirty tracking. This ensures that we >> + * never inherit dirtied bits from a previous cycle. >> + */ >> + if (!dmar_domain->dirty_tracking && dirty->bitmap) >> + return -EINVAL; >> + >> + do { >> + struct dma_pte *pte; >> + int lvl = 0; >> + >> + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &lvl, >> + GFP_ATOMIC); >> + pgsize = level_size(lvl) << VTD_PAGE_SHIFT; >> + if (!pte || !dma_pte_present(pte)) { >> + iova += pgsize; >> + continue; >> + } >> + >> + if (dma_sl_pte_test_and_clear_dirty(pte, flags)) >> + iommu_dirty_bitmap_record(dirty, iova, pgsize); >> + iova += pgsize; >> + } while (iova < end); >> + >> + return 0; >> +} >> + >> +const struct iommu_dirty_ops intel_dirty_ops = { >> + .set_dirty_tracking = intel_iommu_set_dirty_tracking, >> + .read_and_clear_dirty = intel_iommu_read_and_clear_dirty, >> +}; >> + >> const struct iommu_ops intel_iommu_ops = { >> .capable = intel_iommu_capable, >> .hw_info = intel_iommu_hw_info, >> diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h >> index c18fb699c87a..27bcfd3bacdd 100644 >> --- a/drivers/iommu/intel/iommu.h >> +++ b/drivers/iommu/intel/iommu.h >> @@ -48,6 +48,9 @@ >> #define DMA_FL_PTE_DIRTY BIT_ULL(6) >> #define DMA_FL_PTE_XD BIT_ULL(63) >> +#define DMA_SL_PTE_DIRTY_BIT 9 >> +#define DMA_SL_PTE_DIRTY BIT_ULL(DMA_SL_PTE_DIRTY_BIT) >> + >> #define ADDR_WIDTH_5LEVEL (57) >> #define ADDR_WIDTH_4LEVEL (48) >> @@ -539,6 +542,9 @@ enum { >> #define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) >> #define pasid_supported(iommu) (sm_supported(iommu) && \ >> ecap_pasid((iommu)->ecap)) >> +#define slads_supported(iommu) (sm_supported(iommu) && \ > > how about ssads_supporte. > I've changed it >> + ecap_slads((iommu)->ecap)) >> + > > remove this empty line. > OK >> struct pasid_entry; >> struct pasid_state_entry; > > Regards, > Yi Liu > >> @@ -592,6 +598,7 @@ struct dmar_domain { >> * otherwise, goes through the second >> * level. >> */ >> + u8 dirty_tracking:1; /* Dirty tracking is enabled */ >> spinlock_t lock; /* Protect device tracking lists */ >> struct list_head devices; /* all devices' list */ >> @@ -781,6 +788,16 @@ static inline bool dma_pte_present(struct dma_pte *pte) >> return (pte->val & 3) != 0; >> } >> +static inline bool dma_sl_pte_test_and_clear_dirty(struct dma_pte *pte, >> + unsigned long flags) >> +{ >> + if (flags & IOMMU_DIRTY_NO_CLEAR) >> + return (pte->val & DMA_SL_PTE_DIRTY) != 0; >> + >> + return test_and_clear_bit(DMA_SL_PTE_DIRTY_BIT, >> + (unsigned long *)&pte->val); >> +} >> + >> static inline bool dma_pte_superpage(struct dma_pte *pte) >> { >> return (pte->val & DMA_PTE_LARGE_PAGE); >> diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c >> index 8f92b92f3d2a..9a01d46a56e1 100644 >> --- a/drivers/iommu/intel/pasid.c >> +++ b/drivers/iommu/intel/pasid.c >> @@ -277,6 +277,11 @@ static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 >> bits) >> WRITE_ONCE(*ptr, (old & ~mask) | bits); >> } >> +static inline u64 pasid_get_bits(u64 *ptr) >> +{ >> + return READ_ONCE(*ptr); >> +} >> + >> /* >> * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode >> * PASID entry. >> @@ -335,6 +340,36 @@ static inline void pasid_set_fault_enable(struct >> pasid_entry *pe) >> pasid_set_bits(&pe->val[0], 1 << 1, 0); >> } >> +/* >> + * Enable second level A/D bits by setting the SLADE (Second Level >> + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID >> + * entry. >> + */ >> +static inline void pasid_set_ssade(struct pasid_entry *pe) >> +{ >> + pasid_set_bits(&pe->val[0], 1 << 9, 1 << 9); >> +} >> + >> +/* >> + * Disable second level A/D bits by clearing the SLADE (Second Level >> + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID >> + * entry. >> + */ >> +static inline void pasid_clear_ssade(struct pasid_entry *pe) >> +{ >> + pasid_set_bits(&pe->val[0], 1 << 9, 0); >> +} >> + >> +/* >> + * Checks if second level A/D bits specifically the SLADE (Second Level >> + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID >> + * entry is set. >> + */ >> +static inline bool pasid_get_ssade(struct pasid_entry *pe) >> +{ >> + return pasid_get_bits(&pe->val[0]) & (1 << 9); >> +} >> + >> /* >> * Setup the WPE(Write Protect Enable) field (Bit 132) of a >> * scalable mode PASID entry. >> @@ -627,6 +662,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, >> pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); >> pasid_set_fault_enable(pte); >> pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); >> + if (domain->dirty_tracking) >> + pasid_set_ssade(pte); >> pasid_set_present(pte); >> spin_unlock(&iommu->lock); >> @@ -636,6 +673,77 @@ int intel_pasid_setup_second_level(struct intel_iommu >> *iommu, >> return 0; >> } >> +/* >> + * Set up dirty tracking on a second only or nested translation type. >> + */ >> +int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, >> + struct dmar_domain *domain, >> + struct device *dev, u32 pasid, >> + bool enabled) >> +{ >> + struct pasid_entry *pte; >> + u16 did, pgtt; >> + >> + spin_lock(&iommu->lock); >> + >> + pte = intel_pasid_get_entry(dev, pasid); >> + if (!pte) { >> + spin_unlock(&iommu->lock); >> + dev_err_ratelimited(dev, >> + "Failed to get pasid entry of PASID %d\n", >> + pasid); >> + return -ENODEV; >> + } >> + >> + did = domain_id_iommu(domain, iommu); >> + pgtt = pasid_pte_get_pgtt(pte); >> + if (pgtt != PASID_ENTRY_PGTT_SL_ONLY && pgtt != PASID_ENTRY_PGTT_NESTED) { >> + spin_unlock(&iommu->lock); >> + dev_err_ratelimited(dev, >> + "Dirty tracking not supported on translation type %d\n", >> + pgtt); >> + return -EOPNOTSUPP; >> + } >> + >> + if (pasid_get_ssade(pte) == enabled) { >> + spin_unlock(&iommu->lock); >> + return 0; >> + } >> + >> + if (enabled) >> + pasid_set_ssade(pte); >> + else >> + pasid_clear_ssade(pte); >> + spin_unlock(&iommu->lock); >> + >> + if (!ecap_coherent(iommu->ecap)) >> + clflush_cache_range(pte, sizeof(*pte)); >> + >> + /* >> + * From VT-d spec table 25 "Guidance to Software for Invalidations": >> + * >> + * - PASID-selective-within-Domain PASID-cache invalidation >> + * If (PGTT=SS or Nested) >> + * - Domain-selective IOTLB invalidation >> + * Else >> + * - PASID-selective PASID-based IOTLB invalidation >> + * - If (pasid is RID_PASID) >> + * - Global Device-TLB invalidation to affected functions >> + * Else >> + * - PASID-based Device-TLB invalidation (with S=1 and >> + * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions >> + */ >> + pasid_cache_invalidation_with_pasid(iommu, did, pasid); >> + >> + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); >> + >> + /* Device IOTLB doesn't need to be flushed in caching mode. */ >> + if (!cap_caching_mode(iommu->cap)) >> + devtlb_invalidation_with_pasid(iommu, dev, pasid); >> + >> + return 0; >> +} >> + >> /* >> * Set up the scalable mode pasid entry for passthrough translation type. >> */ >> diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h >> index 4e9e68c3c388..958050b093aa 100644 >> --- a/drivers/iommu/intel/pasid.h >> +++ b/drivers/iommu/intel/pasid.h >> @@ -106,6 +106,10 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, >> int intel_pasid_setup_second_level(struct intel_iommu *iommu, >> struct dmar_domain *domain, >> struct device *dev, u32 pasid); >> +int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, >> + struct dmar_domain *domain, >> + struct device *dev, u32 pasid, >> + bool enabled); >> int intel_pasid_setup_pass_through(struct intel_iommu *iommu, >> struct dmar_domain *domain, >> struct device *dev, u32 pasid)
On 24/10/2023 13:42, Joao Martins wrote: > On 24/10/2023 13:34, Yi Liu wrote: >> On 2023/10/21 06:27, Joao Martins wrote: >>> --- >>> drivers/iommu/intel/Kconfig | 1 + >>> drivers/iommu/intel/iommu.c | 104 +++++++++++++++++++++++++++++++++- >>> drivers/iommu/intel/iommu.h | 17 ++++++ >>> drivers/iommu/intel/pasid.c | 108 ++++++++++++++++++++++++++++++++++++ >>> drivers/iommu/intel/pasid.h | 4 ++ >>> 5 files changed, 233 insertions(+), 1 deletion(-) >> >> normally, the subject of commits to intel iommu driver is started >> with 'iommu/vt-d'. So if there is a new version, please rename it. >> Also, SL is a bit eld naming, please use SS (second stage) >> >> s/iommu/intel: Access/Dirty bit support for SL domains/iommu/vt-d: Access/Dirty >> bit support for SS domains >> > OK > FYI, this is what I have staged in: Subject: iommu/vt-d: Access/Dirty bit support for SS domains diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 4e25faf573de..eb92a201cc0b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4094,7 +4094,7 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags) return ERR_PTR(-EOPNOTSUPP); dirty_tracking = (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING); - if (dirty_tracking && !slads_supported(iommu)) + if (dirty_tracking && !ssads_supported(iommu)) return ERR_PTR(-EOPNOTSUPP); /* @@ -4137,7 +4137,7 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap)) return -EINVAL; - if (domain->dirty_ops && !slads_supported(iommu)) + if (domain->dirty_ops && !ssads_supported(iommu)) return -EINVAL; /* check if this iommu agaw is sufficient for max mapped address */ @@ -4395,7 +4395,7 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: return ecap_sc_support(info->iommu->ecap); case IOMMU_CAP_DIRTY_TRACKING: - return slads_supported(info->iommu); + return ssads_supported(info->iommu); default: return false; } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 27bcfd3bacdd..3bb569146229 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -542,10 +542,9 @@ enum { #define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) #define pasid_supported(iommu) (sm_supported(iommu) && \ ecap_pasid((iommu)->ecap)) -#define slads_supported(iommu) (sm_supported(iommu) && \ +#define ssads_supported(iommu) (sm_supported(iommu) && \ ecap_slads((iommu)->ecap)) - struct pasid_entry; struct pasid_state_entry; struct page_req_dsc;
On 2023/10/24 20:52, Joao Martins wrote: > On 24/10/2023 13:42, Joao Martins wrote: >> On 24/10/2023 13:34, Yi Liu wrote: >>> On 2023/10/21 06:27, Joao Martins wrote: >>>> --- >>>> drivers/iommu/intel/Kconfig | 1 + >>>> drivers/iommu/intel/iommu.c | 104 +++++++++++++++++++++++++++++++++- >>>> drivers/iommu/intel/iommu.h | 17 ++++++ >>>> drivers/iommu/intel/pasid.c | 108 ++++++++++++++++++++++++++++++++++++ >>>> drivers/iommu/intel/pasid.h | 4 ++ >>>> 5 files changed, 233 insertions(+), 1 deletion(-) >>> >>> normally, the subject of commits to intel iommu driver is started >>> with 'iommu/vt-d'. So if there is a new version, please rename it. >>> Also, SL is a bit eld naming, please use SS (second stage) >>> >>> s/iommu/intel: Access/Dirty bit support for SL domains/iommu/vt-d: Access/Dirty >>> bit support for SS domains >>> >> OK >> > FYI, this is what I have staged in: sure. > Subject: iommu/vt-d: Access/Dirty bit support for SS domains > > diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c > index 4e25faf573de..eb92a201cc0b 100644 > --- a/drivers/iommu/intel/iommu.c > +++ b/drivers/iommu/intel/iommu.c > @@ -4094,7 +4094,7 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags) > return ERR_PTR(-EOPNOTSUPP); > > dirty_tracking = (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING); > - if (dirty_tracking && !slads_supported(iommu)) > + if (dirty_tracking && !ssads_supported(iommu)) > return ERR_PTR(-EOPNOTSUPP); > > /* > @@ -4137,7 +4137,7 @@ static int prepare_domain_attach_device(struct > iommu_domain *domain, > if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap)) > return -EINVAL; > > - if (domain->dirty_ops && !slads_supported(iommu)) > + if (domain->dirty_ops && !ssads_supported(iommu)) > return -EINVAL; > > /* check if this iommu agaw is sufficient for max mapped address */ > @@ -4395,7 +4395,7 @@ static bool intel_iommu_capable(struct device *dev, enum > iommu_cap cap) > case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: > return ecap_sc_support(info->iommu->ecap); > case IOMMU_CAP_DIRTY_TRACKING: > - return slads_supported(info->iommu); > + return ssads_supported(info->iommu); > default: > return false; > } > diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h > index 27bcfd3bacdd..3bb569146229 100644 > --- a/drivers/iommu/intel/iommu.h > +++ b/drivers/iommu/intel/iommu.h > @@ -542,10 +542,9 @@ enum { > #define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) > #define pasid_supported(iommu) (sm_supported(iommu) && \ > ecap_pasid((iommu)->ecap)) > -#define slads_supported(iommu) (sm_supported(iommu) && \ > +#define ssads_supported(iommu) (sm_supported(iommu) && \ > ecap_slads((iommu)->ecap)) > > - > struct pasid_entry; > struct pasid_state_entry; > struct page_req_dsc;
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 2e56bd79f589..f5348b80652b 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -15,6 +15,7 @@ config INTEL_IOMMU select DMA_OPS select IOMMU_API select IOMMU_IOVA + select IOMMUFD_DRIVER if IOMMUFD select NEED_DMA_MAP_STATE select DMAR_TABLE select SWIOTLB diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 017aed5813d8..d7ba1732130b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -300,6 +300,7 @@ static int iommu_skip_te_disable; #define IDENTMAP_AZALIA 4 const struct iommu_ops intel_iommu_ops; +const struct iommu_dirty_ops intel_dirty_ops; static bool translation_pre_enabled(struct intel_iommu *iommu) { @@ -4079,8 +4080,10 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags) { struct iommu_domain *domain; struct intel_iommu *iommu; + bool dirty_tracking; - if (flags & (~IOMMU_HWPT_ALLOC_NEST_PARENT)) + if (flags & (~(IOMMU_HWPT_ALLOC_NEST_PARENT| + IOMMU_HWPT_ALLOC_DIRTY_TRACKING))) return ERR_PTR(-EOPNOTSUPP); iommu = device_to_iommu(dev, NULL, NULL); @@ -4090,6 +4093,10 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags) if ((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) && !ecap_nest(iommu->ecap)) return ERR_PTR(-EOPNOTSUPP); + dirty_tracking = (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING); + if (dirty_tracking && !slads_supported(iommu)) + return ERR_PTR(-EOPNOTSUPP); + /* * domain_alloc_user op needs to fully initialize a domain * before return, so uses iommu_domain_alloc() here for @@ -4098,6 +4105,15 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags) domain = iommu_domain_alloc(dev->bus); if (!domain) domain = ERR_PTR(-ENOMEM); + + if (!IS_ERR(domain) && dirty_tracking) { + if (to_dmar_domain(domain)->use_first_level) { + iommu_domain_free(domain); + return ERR_PTR(-EOPNOTSUPP); + } + domain->dirty_ops = &intel_dirty_ops; + } + return domain; } @@ -4121,6 +4137,9 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap)) return -EINVAL; + if (domain->dirty_ops && !slads_supported(iommu)) + return -EINVAL; + /* check if this iommu agaw is sufficient for max mapped address */ addr_width = agaw_to_width(iommu->agaw); if (addr_width > cap_mgaw(iommu->cap)) @@ -4375,6 +4394,8 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) return dmar_platform_optin(); case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: return ecap_sc_support(info->iommu->ecap); + case IOMMU_CAP_DIRTY_TRACKING: + return slads_supported(info->iommu); default: return false; } @@ -4772,6 +4793,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) return -EOPNOTSUPP; + if (domain->dirty_ops) + return -EINVAL; + if (context_copied(iommu, info->bus, info->devfn)) return -EBUSY; @@ -4830,6 +4854,84 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type) return vtd; } +static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, + bool enable) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct device_domain_info *info; + int ret; + + spin_lock(&dmar_domain->lock); + if (dmar_domain->dirty_tracking == enable) + goto out_unlock; + + list_for_each_entry(info, &dmar_domain->devices, link) { + ret = intel_pasid_setup_dirty_tracking(info->iommu, info->domain, + info->dev, IOMMU_NO_PASID, + enable); + if (ret) + goto err_unwind; + + } + + dmar_domain->dirty_tracking = enable; +out_unlock: + spin_unlock(&dmar_domain->lock); + + return 0; + +err_unwind: + list_for_each_entry(info, &dmar_domain->devices, link) + intel_pasid_setup_dirty_tracking(info->iommu, dmar_domain, + info->dev, IOMMU_NO_PASID, + dmar_domain->dirty_tracking); + spin_unlock(&dmar_domain->lock); + return ret; +} + +static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + unsigned long end = iova + size - 1; + unsigned long pgsize; + + /* + * IOMMUFD core calls into a dirty tracking disabled domain without an + * IOVA bitmap set in order to clean dirty bits in all PTEs that might + * have occurred when we stopped dirty tracking. This ensures that we + * never inherit dirtied bits from a previous cycle. + */ + if (!dmar_domain->dirty_tracking && dirty->bitmap) + return -EINVAL; + + do { + struct dma_pte *pte; + int lvl = 0; + + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &lvl, + GFP_ATOMIC); + pgsize = level_size(lvl) << VTD_PAGE_SHIFT; + if (!pte || !dma_pte_present(pte)) { + iova += pgsize; + continue; + } + + if (dma_sl_pte_test_and_clear_dirty(pte, flags)) + iommu_dirty_bitmap_record(dirty, iova, pgsize); + iova += pgsize; + } while (iova < end); + + return 0; +} + +const struct iommu_dirty_ops intel_dirty_ops = { + .set_dirty_tracking = intel_iommu_set_dirty_tracking, + .read_and_clear_dirty = intel_iommu_read_and_clear_dirty, +}; + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .hw_info = intel_iommu_hw_info, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index c18fb699c87a..27bcfd3bacdd 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -48,6 +48,9 @@ #define DMA_FL_PTE_DIRTY BIT_ULL(6) #define DMA_FL_PTE_XD BIT_ULL(63) +#define DMA_SL_PTE_DIRTY_BIT 9 +#define DMA_SL_PTE_DIRTY BIT_ULL(DMA_SL_PTE_DIRTY_BIT) + #define ADDR_WIDTH_5LEVEL (57) #define ADDR_WIDTH_4LEVEL (48) @@ -539,6 +542,9 @@ enum { #define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) #define pasid_supported(iommu) (sm_supported(iommu) && \ ecap_pasid((iommu)->ecap)) +#define slads_supported(iommu) (sm_supported(iommu) && \ + ecap_slads((iommu)->ecap)) + struct pasid_entry; struct pasid_state_entry; @@ -592,6 +598,7 @@ struct dmar_domain { * otherwise, goes through the second * level. */ + u8 dirty_tracking:1; /* Dirty tracking is enabled */ spinlock_t lock; /* Protect device tracking lists */ struct list_head devices; /* all devices' list */ @@ -781,6 +788,16 @@ static inline bool dma_pte_present(struct dma_pte *pte) return (pte->val & 3) != 0; } +static inline bool dma_sl_pte_test_and_clear_dirty(struct dma_pte *pte, + unsigned long flags) +{ + if (flags & IOMMU_DIRTY_NO_CLEAR) + return (pte->val & DMA_SL_PTE_DIRTY) != 0; + + return test_and_clear_bit(DMA_SL_PTE_DIRTY_BIT, + (unsigned long *)&pte->val); +} + static inline bool dma_pte_superpage(struct dma_pte *pte) { return (pte->val & DMA_PTE_LARGE_PAGE); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 8f92b92f3d2a..9a01d46a56e1 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -277,6 +277,11 @@ static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) WRITE_ONCE(*ptr, (old & ~mask) | bits); } +static inline u64 pasid_get_bits(u64 *ptr) +{ + return READ_ONCE(*ptr); +} + /* * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode * PASID entry. @@ -335,6 +340,36 @@ static inline void pasid_set_fault_enable(struct pasid_entry *pe) pasid_set_bits(&pe->val[0], 1 << 1, 0); } +/* + * Enable second level A/D bits by setting the SLADE (Second Level + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_ssade(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 9, 1 << 9); +} + +/* + * Disable second level A/D bits by clearing the SLADE (Second Level + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID + * entry. + */ +static inline void pasid_clear_ssade(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 9, 0); +} + +/* + * Checks if second level A/D bits specifically the SLADE (Second Level + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID + * entry is set. + */ +static inline bool pasid_get_ssade(struct pasid_entry *pe) +{ + return pasid_get_bits(&pe->val[0]) & (1 << 9); +} + /* * Setup the WPE(Write Protect Enable) field (Bit 132) of a * scalable mode PASID entry. @@ -627,6 +662,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); pasid_set_fault_enable(pte); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + if (domain->dirty_tracking) + pasid_set_ssade(pte); pasid_set_present(pte); spin_unlock(&iommu->lock); @@ -636,6 +673,77 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, return 0; } +/* + * Set up dirty tracking on a second only or nested translation type. + */ +int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, u32 pasid, + bool enabled) +{ + struct pasid_entry *pte; + u16 did, pgtt; + + spin_lock(&iommu->lock); + + pte = intel_pasid_get_entry(dev, pasid); + if (!pte) { + spin_unlock(&iommu->lock); + dev_err_ratelimited(dev, + "Failed to get pasid entry of PASID %d\n", + pasid); + return -ENODEV; + } + + did = domain_id_iommu(domain, iommu); + pgtt = pasid_pte_get_pgtt(pte); + if (pgtt != PASID_ENTRY_PGTT_SL_ONLY && pgtt != PASID_ENTRY_PGTT_NESTED) { + spin_unlock(&iommu->lock); + dev_err_ratelimited(dev, + "Dirty tracking not supported on translation type %d\n", + pgtt); + return -EOPNOTSUPP; + } + + if (pasid_get_ssade(pte) == enabled) { + spin_unlock(&iommu->lock); + return 0; + } + + if (enabled) + pasid_set_ssade(pte); + else + pasid_clear_ssade(pte); + spin_unlock(&iommu->lock); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + /* + * From VT-d spec table 25 "Guidance to Software for Invalidations": + * + * - PASID-selective-within-Domain PASID-cache invalidation + * If (PGTT=SS or Nested) + * - Domain-selective IOTLB invalidation + * Else + * - PASID-selective PASID-based IOTLB invalidation + * - If (pasid is RID_PASID) + * - Global Device-TLB invalidation to affected functions + * Else + * - PASID-based Device-TLB invalidation (with S=1 and + * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions + */ + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); + + /* Device IOTLB doesn't need to be flushed in caching mode. */ + if (!cap_caching_mode(iommu->cap)) + devtlb_invalidation_with_pasid(iommu, dev, pasid); + + return 0; +} + /* * Set up the scalable mode pasid entry for passthrough translation type. */ diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 4e9e68c3c388..958050b093aa 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -106,6 +106,10 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, int intel_pasid_setup_second_level(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, u32 pasid); +int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, u32 pasid, + bool enabled); int intel_pasid_setup_pass_through(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, u32 pasid);