diff mbox series

[v6,8/8] iommu/vt-d: Disallow read-only mappings to nest parent domain

Message ID 20231020093246.17015-9-yi.l.liu@intel.com (mailing list archive)
State New, archived
Headers show
Series Add Intel VT-d nested translation (part 1/2) | expand

Commit Message

Yi Liu Oct. 20, 2023, 9:32 a.m. UTC
From: Lu Baolu <baolu.lu@linux.intel.com>

When remapping hardware is configured by system software in scalable mode
as Nested (PGTT=011b) and with PWSNP field Set in the PASID-table-entry,
it may Set Accessed bit and Dirty bit (and Extended Access bit if enabled)
in first-stage page-table entries even when second-stage mappings indicate
that corresponding first-stage page-table is Read-Only.

As the result, contents of pages designated by VMM as Read-Only can be
modified by IOMMU via PML5E (PML4E for 4-level tables) access as part of
address translation process due to DMAs issued by Guest.

This disallows read-only mappings in the domain that is supposed to be used
as nested parent. Reference from Sapphire Rapids Specification Update [1],
errata details, SPR17. Userspace should know this limitation by checking
the IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 flag reported in the IOMMU_GET_HW_INFO
ioctl.

[1] https://www.intel.com/content/www/us/en/content-details/772415/content-details.html

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/iommu/intel/iommu.c  |  9 +++++++++
 drivers/iommu/intel/iommu.h  |  1 +
 include/uapi/linux/iommufd.h | 12 +++++++++++-
 3 files changed, 21 insertions(+), 1 deletion(-)

Comments

Baolu Lu Oct. 21, 2023, 3:24 a.m. UTC | #1
On 10/20/23 5:32 PM, Yi Liu wrote:
> From: Lu Baolu <baolu.lu@linux.intel.com>
> 
> When remapping hardware is configured by system software in scalable mode
> as Nested (PGTT=011b) and with PWSNP field Set in the PASID-table-entry,
> it may Set Accessed bit and Dirty bit (and Extended Access bit if enabled)
> in first-stage page-table entries even when second-stage mappings indicate
> that corresponding first-stage page-table is Read-Only.
> 
> As the result, contents of pages designated by VMM as Read-Only can be
> modified by IOMMU via PML5E (PML4E for 4-level tables) access as part of
> address translation process due to DMAs issued by Guest.
> 
> This disallows read-only mappings in the domain that is supposed to be used
> as nested parent. Reference from Sapphire Rapids Specification Update [1],
> errata details, SPR17. Userspace should know this limitation by checking
> the IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 flag reported in the IOMMU_GET_HW_INFO
> ioctl.
> 
> [1] https://www.intel.com/content/www/us/en/content-details/772415/content-details.html
> 
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> ---
>   drivers/iommu/intel/iommu.c  |  9 +++++++++
>   drivers/iommu/intel/iommu.h  |  1 +
>   include/uapi/linux/iommufd.h | 12 +++++++++++-
>   3 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index c7704e7efd4a..a0341a069fbf 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -2193,6 +2193,11 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
>   	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
>   		return -EINVAL;
>   
> +	if (!(prot & DMA_PTE_WRITE) && domain->is_nested_parent) {
> +		pr_err_ratelimited("Read-only mapping is disallowed on the domain which serves as the parent in a nested configuration, due to HW errata (ERRATA_772415_SPR17)\n");
> +		return -EINVAL;
> +	}
> +
>   	attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
>   	attr |= DMA_FL_PTE_PRESENT;
>   	if (domain->use_first_level) {
> @@ -4101,6 +4106,9 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
>   		domain = iommu_domain_alloc(dev->bus);
>   		if (!domain)
>   			return ERR_PTR(-ENOMEM);
> +		container_of(domain,
> +			     struct dmar_domain,
> +			     domain)->is_nested_parent = request_nest_parent;

How about
		to_dmar_domain(domain)->is_nested_parent = ...;
?

I would also prefer to introduce is_nested_parent_domain to the user
domain allocation patch (patch 7/8). This field should be checked when
allocating a nested user domain.

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 8f81a5c9fcc0..d3f6bc1f6590 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4121,6 +4121,8 @@ intel_iommu_domain_alloc_user(struct device *dev, 
u32 flags,
                 return ERR_PTR(-EINVAL);
         if (request_nest_parent)
                 return ERR_PTR(-EINVAL);
+       if (!to_dmar_domain(parent)->is_nested_parent)
+               return ERR_PTR(-EINVAL);

         return intel_nested_domain_alloc(parent, user_data);
  }

Best regards,
baolu
Yi Liu Oct. 23, 2023, 11:15 a.m. UTC | #2
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Saturday, October 21, 2023 11:24 AM
> 
> On 10/20/23 5:32 PM, Yi Liu wrote:
> > From: Lu Baolu <baolu.lu@linux.intel.com>
> >
> > When remapping hardware is configured by system software in scalable mode
> > as Nested (PGTT=011b) and with PWSNP field Set in the PASID-table-entry,
> > it may Set Accessed bit and Dirty bit (and Extended Access bit if enabled)
> > in first-stage page-table entries even when second-stage mappings indicate
> > that corresponding first-stage page-table is Read-Only.
> >
> > As the result, contents of pages designated by VMM as Read-Only can be
> > modified by IOMMU via PML5E (PML4E for 4-level tables) access as part of
> > address translation process due to DMAs issued by Guest.
> >
> > This disallows read-only mappings in the domain that is supposed to be used
> > as nested parent. Reference from Sapphire Rapids Specification Update [1],
> > errata details, SPR17. Userspace should know this limitation by checking
> > the IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 flag reported in the
> IOMMU_GET_HW_INFO
> > ioctl.
> >
> > [1] https://www.intel.com/content/www/us/en/content-details/772415/content-
> details.html
> >
> > Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> > Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> > Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> > ---
> >   drivers/iommu/intel/iommu.c  |  9 +++++++++
> >   drivers/iommu/intel/iommu.h  |  1 +
> >   include/uapi/linux/iommufd.h | 12 +++++++++++-
> >   3 files changed, 21 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> > index c7704e7efd4a..a0341a069fbf 100644
> > --- a/drivers/iommu/intel/iommu.c
> > +++ b/drivers/iommu/intel/iommu.c
> > @@ -2193,6 +2193,11 @@ __domain_mapping(struct dmar_domain *domain,
> unsigned long iov_pfn,
> >   	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
> >   		return -EINVAL;
> >
> > +	if (!(prot & DMA_PTE_WRITE) && domain->is_nested_parent) {
> > +		pr_err_ratelimited("Read-only mapping is disallowed on the domain
> which serves as the parent in a nested configuration, due to HW errata
> (ERRATA_772415_SPR17)\n");
> > +		return -EINVAL;
> > +	}
> > +
> >   	attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
> >   	attr |= DMA_FL_PTE_PRESENT;
> >   	if (domain->use_first_level) {
> > @@ -4101,6 +4106,9 @@ intel_iommu_domain_alloc_user(struct device *dev, u32
> flags,
> >   		domain = iommu_domain_alloc(dev->bus);
> >   		if (!domain)
> >   			return ERR_PTR(-ENOMEM);
> > +		container_of(domain,
> > +			     struct dmar_domain,
> > +			     domain)->is_nested_parent = request_nest_parent;
> 
> How about
> 		to_dmar_domain(domain)->is_nested_parent = ...;
> ?

Yes.

> 
> I would also prefer to introduce is_nested_parent_domain to the user
> domain allocation patch (patch 7/8). This field should be checked when
> allocating a nested user domain.

A ctually, no need. This should be a common check, so iommufd core already
has the check. So the parent should be a nest parent domain, otherwise already
returned in iommufd.

+	if (!parent->nest_parent)
+		return ERR_PTR(-EINVAL);

https://lore.kernel.org/linux-iommu/20231020091946.12173-8-yi.l.liu@intel.com/

> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index 8f81a5c9fcc0..d3f6bc1f6590 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -4121,6 +4121,8 @@ intel_iommu_domain_alloc_user(struct device *dev,
> u32 flags,
>                  return ERR_PTR(-EINVAL);
>          if (request_nest_parent)
>                  return ERR_PTR(-EINVAL);
> +       if (!to_dmar_domain(parent)->is_nested_parent)
> +               return ERR_PTR(-EINVAL);
> 
>          return intel_nested_domain_alloc(parent, user_data);
>   }
> 
> Best regards,
> baolu
Baolu Lu Oct. 23, 2023, 12:18 p.m. UTC | #3
On 2023/10/23 19:15, Liu, Yi L wrote:
>> I would also prefer to introduce is_nested_parent_domain to the user
>> domain allocation patch (patch 7/8). This field should be checked when
>> allocating a nested user domain.
> A ctually, no need. This should be a common check, so iommufd core already
> has the check. So the parent should be a nest parent domain, otherwise already
> returned in iommufd.
> 
> +	if (!parent->nest_parent)
> +		return ERR_PTR(-EINVAL);

I know this will not cause errors in the code. But since you are
introducing is_parent property in the vt-d driver. The integrity of the
property should be ensured. In this way, it will make the code more
readable and maintainable.

Best regards,
baolu
Yi Liu Oct. 24, 2023, 2:06 a.m. UTC | #4
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Monday, October 23, 2023 8:18 PM
> 
> On 2023/10/23 19:15, Liu, Yi L wrote:
> >> I would also prefer to introduce is_nested_parent_domain to the user
> >> domain allocation patch (patch 7/8). This field should be checked when
> >> allocating a nested user domain.
> > A ctually, no need. This should be a common check, so iommufd core already
> > has the check. So the parent should be a nest parent domain, otherwise already
> > returned in iommufd.
> >
> > +	if (!parent->nest_parent)
> > +		return ERR_PTR(-EINVAL);
> 
> I know this will not cause errors in the code. But since you are
> introducing is_parent property in the vt-d driver. The integrity of the
> property should be ensured. In this way, it will make the code more
> readable and maintainable.

Ok, if consider it as a property, then it's fine. At first, I just want to
make it as a special flag for this errata. But we cannot predict if there
will be more nested parent special stuffs, then this flag is also needed.

Regards,
Yi Liu
diff mbox series

Patch

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index c7704e7efd4a..a0341a069fbf 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2193,6 +2193,11 @@  __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
 		return -EINVAL;
 
+	if (!(prot & DMA_PTE_WRITE) && domain->is_nested_parent) {
+		pr_err_ratelimited("Read-only mapping is disallowed on the domain which serves as the parent in a nested configuration, due to HW errata (ERRATA_772415_SPR17)\n");
+		return -EINVAL;
+	}
+
 	attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
 	attr |= DMA_FL_PTE_PRESENT;
 	if (domain->use_first_level) {
@@ -4101,6 +4106,9 @@  intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
 		domain = iommu_domain_alloc(dev->bus);
 		if (!domain)
 			return ERR_PTR(-ENOMEM);
+		container_of(domain,
+			     struct dmar_domain,
+			     domain)->is_nested_parent = request_nest_parent;
 		return domain;
 	}
 
@@ -4839,6 +4847,7 @@  static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
 	if (!vtd)
 		return ERR_PTR(-ENOMEM);
 
+	vtd->flags = IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17;
 	vtd->cap_reg = iommu->cap;
 	vtd->ecap_reg = iommu->ecap;
 	*length = sizeof(*vtd);
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index b4560983b8b9..0539a0f47557 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -593,6 +593,7 @@  struct dmar_domain {
 					 * otherwise, goes through the second
 					 * level.
 					 */
+	u8 is_nested_parent:1;		/* has other domains nested on it */
 
 	spinlock_t lock;		/* Protect device tracking lists */
 	struct list_head devices;	/* all devices' list */
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 9b843a197ea8..c8f523a7bc06 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -439,10 +439,20 @@  struct iommu_hwpt_alloc {
 };
 #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
 
+/**
+ * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info
+ * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow nesting on domains
+ *                                   with read-only mapping.
+ *                                   https://www.intel.com/content/www/us/en/content-details/772415/content-details.html
+ */
+enum iommu_hw_info_vtd_flags {
+	IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0,
+};
+
 /**
  * struct iommu_hw_info_vtd - Intel VT-d hardware information
  *
- * @flags: Must be 0
+ * @flags: Combination of enum iommu_hw_info_vtd_flags
  * @__reserved: Must be 0
  *
  * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec