[v4,11/18] iommu/amd: Access/Dirty bit support in IOPTEs

Message ID	20231018202715.69734-12-joao.m.martins@oracle.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@vger.kernel.org> From: Joao Martins <joao.m.martins@oracle.com> To: iommu@lists.linux.dev Cc: Jason Gunthorpe <jgg@nvidia.com>, Kevin Tian <kevin.tian@intel.com>, Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>, Lu Baolu <baolu.lu@linux.intel.com>, Yi Liu <yi.l.liu@intel.com>, Yi Y Sun <yi.y.sun@intel.com>, Nicolin Chen <nicolinc@nvidia.com>, Joerg Roedel <joro@8bytes.org>, Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>, Will Deacon <will@kernel.org>, Robin Murphy <robin.murphy@arm.com>, Zhenzhong Duan <zhenzhong.duan@intel.com>, Alex Williamson <alex.williamson@redhat.com>, kvm@vger.kernel.org, Joao Martins <joao.m.martins@oracle.com> Subject: [PATCH v4 11/18] iommu/amd: Access/Dirty bit support in IOPTEs Date: Wed, 18 Oct 2023 21:27:08 +0100 Message-Id: <20231018202715.69734-12-joao.m.martins@oracle.com> In-Reply-To: <20231018202715.69734-1-joao.m.martins@oracle.com> References: <20231018202715.69734-1-joao.m.martins@oracle.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	IOMMUFD Dirty Tracking \| expand [v4,00/18] IOMMUFD Dirty Tracking [v4,01/18] vfio/iova_bitmap: Export more API symbols [v4,02/18] vfio: Move iova_bitmap into iommufd [v4,03/18] iommufd/iova_bitmap: Move symbols to IOMMUFD namespace [v4,04/18] iommu: Add iommu_domain ops for dirty tracking [v4,05/18] iommufd: Add a flag to enforce dirty tracking on attach [v4,06/18] iommufd: Add IOMMU_HWPT_SET_DIRTY [v4,07/18] iommufd: Add IOMMU_HWPT_GET_DIRTY_IOVA [v4,08/18] iommufd: Add capabilities to IOMMU_GET_HW_INFO [v4,09/18] iommufd: Add a flag to skip clearing of IOPTE dirty [v4,10/18] iommu/amd: Add domain_alloc_user based domain allocation [v4,11/18] iommu/amd: Access/Dirty bit support in IOPTEs [v4,12/18] iommu/intel: Access/Dirty bit support for SL domains [v4,13/18] iommufd/selftest: Expand mock_domain with dev_flags [v4,14/18] iommufd/selftest: Test IOMMU_HWPT_ALLOC_ENFORCE_DIRTY [v4,15/18] iommufd/selftest: Test IOMMU_HWPT_SET_DIRTY [v4,16/18] iommufd/selftest: Test IOMMU_HWPT_GET_DIRTY_IOVA [v4,17/18] iommufd/selftest: Test out_capabilities in IOMMU_GET_HW_INFO [v4,18/18] iommufd/selftest: Test IOMMU_GET_DIRTY_IOVA_NO_CLEAR flag

diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 9b5fc3356bf2..8bd4c3b183ec 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -10,6 +10,7 @@ config AMD_IOMMU select IOMMU_API select IOMMU_IOVA select IOMMU_IO_PGTABLE + select IOMMUFD_DRIVER if IOMMUFD depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE help With this option you can enable support for AMD IOMMU hardware in diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 7dc30c2b56b3..dec4e5c2b66b 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -97,7 +97,9 @@ #define FEATURE_GATS_MASK (3ULL) #define FEATURE_GAM_VAPIC BIT_ULL(21) #define FEATURE_GIOSUP BIT_ULL(48) +#define FEATURE_HASUP BIT_ULL(49) #define FEATURE_EPHSUP BIT_ULL(50) +#define FEATURE_HDSUP BIT_ULL(52) #define FEATURE_SNP BIT_ULL(63) #define FEATURE_PASID_SHIFT 32 @@ -212,6 +214,7 @@ /* macros and definitions for device table entries */ #define DEV_ENTRY_VALID 0x00 #define DEV_ENTRY_TRANSLATION 0x01 +#define DEV_ENTRY_HAD 0x07 #define DEV_ENTRY_PPR 0x34 #define DEV_ENTRY_IR 0x3d #define DEV_ENTRY_IW 0x3e @@ -370,10 +373,16 @@ #define PTE_LEVEL_PAGE_SIZE(level) \ (1ULL << (12 + (9 * (level)))) +/* + * The IOPTE dirty bit + */ +#define IOMMU_PTE_HD_BIT (6) + /* * Bit value definition for I/O PTE fields */ #define IOMMU_PTE_PR BIT_ULL(0) +#define IOMMU_PTE_HD BIT_ULL(IOMMU_PTE_HD_BIT) #define IOMMU_PTE_U BIT_ULL(59) #define IOMMU_PTE_FC BIT_ULL(60) #define IOMMU_PTE_IR BIT_ULL(61) @@ -384,6 +393,7 @@ */ #define DTE_FLAG_V BIT_ULL(0) #define DTE_FLAG_TV BIT_ULL(1) +#define DTE_FLAG_HAD (3ULL << 7) #define DTE_FLAG_GIOV BIT_ULL(54) #define DTE_FLAG_GV BIT_ULL(55) #define DTE_GLX_SHIFT (56) @@ -413,6 +423,7 @@ #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR) +#define IOMMU_PTE_DIRTY(pte) ((pte) & IOMMU_PTE_HD) #define IOMMU_PTE_PAGE(pte) (iommu_phys_to_virt((pte) & IOMMU_PAGE_MASK)) #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07) @@ -563,6 +574,7 @@ struct protection_domain { int nid; /* Node ID */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags; /* flags to find out type of domain */ + bool dirty_tracking; /* dirty tracking is enabled in the domain */ unsigned dev_cnt; /* devices assigned to this domain */ unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 2892aa1b4dc1..953f867b4943 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -486,6 +486,74 @@ static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo return (__pte & ~offset_mask) | (iova & offset_mask); } +static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size, + unsigned long flags) +{ + bool test_only = flags & IOMMU_DIRTY_NO_CLEAR; + bool dirty = false; + int i, count; + + /* + * 2.2.3.2 Host Dirty Support + * When a non-default page size is used , software must OR the + * Dirty bits in all of the replicated host PTEs used to map + * the page. The IOMMU does not guarantee the Dirty bits are + * set in all of the replicated PTEs. Any portion of the page + * may have been written even if the Dirty bit is set in only + * one of the replicated PTEs. + */ + count = PAGE_SIZE_PTE_COUNT(size); + for (i = 0; i < count && test_only; i++) { + if (test_bit(IOMMU_PTE_HD_BIT, + (unsigned long *) &ptep[i])) { + dirty = true; + break; + } + } + + for (i = 0; i < count && !test_only; i++) { + if (test_and_clear_bit(IOMMU_PTE_HD_BIT, + (unsigned long *) &ptep[i])) { + dirty = true; + } + } + + return dirty; +} + +static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long end = iova + size - 1; + + do { + unsigned long pgsize = 0; + u64 *ptep, pte; + + ptep = fetch_pte(pgtable, iova, &pgsize); + if (ptep) + pte = READ_ONCE(*ptep); + if (!ptep || !IOMMU_PTE_PRESENT(pte)) { + pgsize = pgsize ?: PTE_LEVEL_PAGE_SIZE(0); + iova += pgsize; + continue; + } + + /* + * Mark the whole IOVA range as dirty even if only one of + * the replicated PTEs were marked dirty. + */ + if (pte_test_and_clear_dirty(ptep, pgsize, flags)) + iommu_dirty_bitmap_record(dirty, iova, pgsize); + iova += pgsize; + } while (iova < end); + + return 0; +} + /* * ---------------------------------------------------- */ @@ -527,6 +595,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo pgtable->iop.ops.map_pages = iommu_v1_map_pages; pgtable->iop.ops.unmap_pages = iommu_v1_unmap_pages; pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; + pgtable->iop.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty; return &pgtable->iop; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 292a09b2fbbf..e7e9982fdad6 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -66,6 +66,7 @@ LIST_HEAD(hpet_map); LIST_HEAD(acpihid_map); const struct iommu_ops amd_iommu_ops; +const struct iommu_dirty_ops amd_dirty_ops; static ATOMIC_NOTIFIER_HEAD(ppr_notifier); int amd_iommu_max_glx_val = -1; @@ -1611,6 +1612,9 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, pte_root |= 1ULL << DEV_ENTRY_PPR; } + if (domain->dirty_tracking) + pte_root |= DTE_FLAG_HAD; + if (domain->flags & PD_IOMMUV2_MASK) { u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl); u64 glx = domain->glx; @@ -2156,10 +2160,16 @@ static inline u64 dma_max_address(void) return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1); } +static bool amd_iommu_hd_support(struct amd_iommu *iommu) +{ + return iommu && (iommu->features & FEATURE_HDSUP); +} + static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, struct device *dev, u32 flags) { + bool enforce_dirty = flags & IOMMU_HWPT_ALLOC_ENFORCE_DIRTY; struct protection_domain *domain; struct amd_iommu *iommu = NULL; @@ -2176,6 +2186,9 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, if (amd_iommu_snp_en && (type == IOMMU_DOMAIN_IDENTITY)) return ERR_PTR(-EINVAL); + if (enforce_dirty && !amd_iommu_hd_support(iommu)) + return ERR_PTR(-EOPNOTSUPP); + domain = protection_domain_alloc(type); if (!domain) return ERR_PTR(-ENOMEM); @@ -2190,6 +2203,9 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, iommu->iommu.ops->pgsize_bitmap; domain->domain.ops = iommu->iommu.ops->default_domain_ops; + + if (enforce_dirty) + domain->domain.dirty_ops = &amd_dirty_ops; } return &domain->domain; @@ -2254,6 +2270,13 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, dev_data->defer_attach = false; + /* + * Restrict to devices with compatible IOMMU hardware support + * when enforcement of dirty tracking is enabled. + */ + if (dom->dirty_ops && !amd_iommu_hd_support(iommu)) + return -EINVAL; + if (dev_data->domain) detach_device(dev); @@ -2372,6 +2395,11 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) return true; case IOMMU_CAP_DEFERRED_FLUSH: return true; + case IOMMU_CAP_DIRTY: { + struct amd_iommu *iommu = rlookup_amd_iommu(dev); + + return amd_iommu_hd_support(iommu); + } default: break; } @@ -2379,6 +2407,69 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) return false; } +static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain, + bool enable) +{ + struct protection_domain *pdomain = to_pdomain(domain); + struct dev_table_entry *dev_table; + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + unsigned long flags; + u64 pte_root; + + spin_lock_irqsave(&pdomain->lock, flags); + if (!(pdomain->dirty_tracking ^ enable)) { + spin_unlock_irqrestore(&pdomain->lock, flags); + return 0; + } + + list_for_each_entry(dev_data, &pdomain->dev_list, list) { + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + continue; + + dev_table = get_dev_table(iommu); + pte_root = dev_table[dev_data->devid].data[0]; + + pte_root = (enable ? + pte_root | DTE_FLAG_HAD : pte_root & ~DTE_FLAG_HAD); + + /* Flush device DTE */ + dev_table[dev_data->devid].data[0] = pte_root; + device_flush_dte(dev_data); + } + + /* Flush IOTLB to mark IOPTE dirty on the next translation(s) */ + amd_iommu_domain_flush_tlb_pde(pdomain); + amd_iommu_domain_flush_complete(pdomain); + pdomain->dirty_tracking = enable; + spin_unlock_irqrestore(&pdomain->lock, flags); + + return 0; +} + +static int amd_iommu_read_and_clear_dirty(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct protection_domain *pdomain = to_pdomain(domain); + struct io_pgtable_ops *ops = &pdomain->iop.iop.ops; + unsigned long lflags; + + if (!ops || !ops->read_and_clear_dirty) + return -EOPNOTSUPP; + + spin_lock_irqsave(&pdomain->lock, lflags); + if (!pdomain->dirty_tracking && dirty->bitmap) { + spin_unlock_irqrestore(&pdomain->lock, lflags); + return -EINVAL; + } + spin_unlock_irqrestore(&pdomain->lock, lflags); + + return ops->read_and_clear_dirty(ops, iova, size, flags, dirty); +} + static void amd_iommu_get_resv_regions(struct device *dev, struct list_head *head) { @@ -2501,6 +2592,11 @@ static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain) return true; } +const struct iommu_dirty_ops amd_dirty_ops = { + .set_dirty_tracking = amd_iommu_set_dirty_tracking, + .read_and_clear_dirty = amd_iommu_read_and_clear_dirty, +}; + const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .domain_alloc = amd_iommu_domain_alloc,

[v4,11/18] iommu/amd: Access/Dirty bit support in IOPTEs

Commit Message

Comments

Patch