diff mbox series

[RFC,3/4] iommu/vt-d: Map/unmap domain with mmmap/mmunmap

Message ID 20190923122454.9888-4-baolu.lu@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Use 1st-level for DMA remapping in guest | expand

Commit Message

Baolu Lu Sept. 23, 2019, 12:24 p.m. UTC
If a dmar domain has DOMAIN_FLAG_FIRST_LEVEL_TRANS bit set
in its flags, IOMMU will use the first level page table for
translation. Hence, we need to map or unmap addresses in the
first level page table.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Cc: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 drivers/iommu/intel-iommu.c | 94 ++++++++++++++++++++++++++++++++-----
 1 file changed, 82 insertions(+), 12 deletions(-)

Comments

Tian, Kevin Sept. 25, 2019, 5 a.m. UTC | #1
> From: Lu Baolu [mailto:baolu.lu@linux.intel.com]
> Sent: Monday, September 23, 2019 8:25 PM
> 
> If a dmar domain has DOMAIN_FLAG_FIRST_LEVEL_TRANS bit set
> in its flags, IOMMU will use the first level page table for
> translation. Hence, we need to map or unmap addresses in the
> first level page table.
> 
> Cc: Ashok Raj <ashok.raj@intel.com>
> Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Cc: Kevin Tian <kevin.tian@intel.com>
> Cc: Liu Yi L <yi.l.liu@intel.com>
> Cc: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
>  drivers/iommu/intel-iommu.c | 94 ++++++++++++++++++++++++++++++++-
> ----
>  1 file changed, 82 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 9cfe8098d993..103480016010 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -168,6 +168,11 @@ static inline unsigned long virt_to_dma_pfn(void
> *p)
>  	return page_to_dma_pfn(virt_to_page(p));
>  }
> 
> +static inline unsigned long dma_pfn_to_addr(unsigned long pfn)
> +{
> +	return pfn << VTD_PAGE_SHIFT;
> +}
> +
>  /* global iommu list, set NULL for ignored DMAR units */
>  static struct intel_iommu **g_iommus;
> 
> @@ -307,6 +312,9 @@ static int hw_pass_through = 1;
>   */
>  #define DOMAIN_FLAG_LOSE_CHILDREN		BIT(1)
> 
> +/* Domain uses first level translation for DMA remapping. */
> +#define DOMAIN_FLAG_FIRST_LEVEL_TRANS		BIT(2)
> +
>  #define for_each_domain_iommu(idx, domain)			\
>  	for (idx = 0; idx < g_num_of_iommus; idx++)		\
>  		if (domain->iommu_refcnt[idx])
> @@ -552,6 +560,11 @@ static inline int domain_type_is_si(struct
> dmar_domain *domain)
>  	return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
>  }
> 
> +static inline int domain_type_is_flt(struct dmar_domain *domain)
> +{
> +	return domain->flags & DOMAIN_FLAG_FIRST_LEVEL_TRANS;
> +}
> +
>  static inline int domain_pfn_supported(struct dmar_domain *domain,
>  				       unsigned long pfn)
>  {
> @@ -1147,8 +1160,15 @@ static struct page *domain_unmap(struct
> dmar_domain *domain,
>  	BUG_ON(start_pfn > last_pfn);
> 
>  	/* we don't need lock here; nobody else touches the iova range */
> -	freelist = dma_pte_clear_level(domain, agaw_to_level(domain-
> >agaw),
> -				       domain->pgd, 0, start_pfn, last_pfn,
> NULL);
> +	if (domain_type_is_flt(domain))
> +		freelist = intel_mmunmap_range(domain,
> +					       dma_pfn_to_addr(start_pfn),
> +					       dma_pfn_to_addr(last_pfn + 1));
> +	else
> +		freelist = dma_pte_clear_level(domain,
> +					       agaw_to_level(domain->agaw),
> +					       domain->pgd, 0, start_pfn,
> +					       last_pfn, NULL);

what about providing an unified interface at the caller side, then having 
the level differentiated within the interface?

> 
>  	/* free pgd */
>  	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw))
> {
> @@ -2213,9 +2233,10 @@ static inline int hardware_largepage_caps(struct
> dmar_domain *domain,
>  	return level;
>  }
> 
> -static int __domain_mapping(struct dmar_domain *domain, unsigned long
> iov_pfn,
> -			    struct scatterlist *sg, unsigned long phys_pfn,
> -			    unsigned long nr_pages, int prot)
> +static int
> +__domain_mapping_dma(struct dmar_domain *domain, unsigned long
> iov_pfn,
> +		     struct scatterlist *sg, unsigned long phys_pfn,
> +		     unsigned long nr_pages, int prot)
>  {
>  	struct dma_pte *first_pte = NULL, *pte = NULL;
>  	phys_addr_t uninitialized_var(pteval);
> @@ -2223,13 +2244,6 @@ static int __domain_mapping(struct
> dmar_domain *domain, unsigned long iov_pfn,
>  	unsigned int largepage_lvl = 0;
>  	unsigned long lvl_pages = 0;
> 
> -	BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
> -
> -	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
> -		return -EINVAL;
> -
> -	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
> -
>  	if (!sg) {
>  		sg_res = nr_pages;
>  		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) |
> prot;
> @@ -2328,6 +2342,62 @@ static int __domain_mapping(struct
> dmar_domain *domain, unsigned long iov_pfn,
>  	return 0;
>  }
> 
> +static int
> +__domain_mapping_mm(struct dmar_domain *domain, unsigned long
> iov_pfn,
> +		    struct scatterlist *sg, unsigned long phys_pfn,
> +		    unsigned long nr_pages, int prot)
> +{
> +	int ret = 0;
> +
> +	if (!sg)
> +		return intel_mmmap_range(domain,
> dma_pfn_to_addr(iov_pfn),
> +					 dma_pfn_to_addr(iov_pfn +
> nr_pages),
> +					 dma_pfn_to_addr(phys_pfn), prot);
> +
> +	while (nr_pages > 0) {
> +		unsigned long sg_pages, phys;
> +		unsigned long pgoff = sg->offset & ~PAGE_MASK;
> +
> +		sg_pages = aligned_nrpages(sg->offset, sg->length);
> +		phys = sg_phys(sg) - pgoff;
> +
> +		ret = intel_mmmap_range(domain,
> dma_pfn_to_addr(iov_pfn),
> +					dma_pfn_to_addr(iov_pfn +
> sg_pages),
> +					phys, prot);
> +		if (ret)
> +			break;
> +
> +		sg->dma_address =
> ((dma_addr_t)dma_pfn_to_addr(iov_pfn)) + pgoff;
> +		sg->dma_length = sg->length;
> +
> +		nr_pages -= sg_pages;
> +		iov_pfn += sg_pages;
> +		sg = sg_next(sg);
> +	}
> +
> +	return ret;
> +}
> +
> +static int
> +__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
> +		 struct scatterlist *sg, unsigned long phys_pfn,
> +		 unsigned long nr_pages, int prot)
> +{
> +	BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
> +
> +	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
> +		return -EINVAL;
> +
> +	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
> +
> +	if (domain_type_is_flt(domain))
> +		return __domain_mapping_mm(domain, iov_pfn, sg,
> +					   phys_pfn, nr_pages, prot);
> +	else
> +		return __domain_mapping_dma(domain, iov_pfn, sg,
> +					    phys_pfn, nr_pages, prot);
> +}
> +
>  static int domain_mapping(struct dmar_domain *domain, unsigned long
> iov_pfn,
>  			  struct scatterlist *sg, unsigned long phys_pfn,
>  			  unsigned long nr_pages, int prot)
> --
> 2.17.1
Baolu Lu Sept. 25, 2019, 7:06 a.m. UTC | #2
Hi,

On 9/25/19 1:00 PM, Tian, Kevin wrote:
>> From: Lu Baolu [mailto:baolu.lu@linux.intel.com]
>> Sent: Monday, September 23, 2019 8:25 PM
>>
>> If a dmar domain has DOMAIN_FLAG_FIRST_LEVEL_TRANS bit set
>> in its flags, IOMMU will use the first level page table for
>> translation. Hence, we need to map or unmap addresses in the
>> first level page table.
>>
>> Cc: Ashok Raj <ashok.raj@intel.com>
>> Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Cc: Kevin Tian <kevin.tian@intel.com>
>> Cc: Liu Yi L <yi.l.liu@intel.com>
>> Cc: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> ---
>>   drivers/iommu/intel-iommu.c | 94 ++++++++++++++++++++++++++++++++-
>> ----
>>   1 file changed, 82 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
>> index 9cfe8098d993..103480016010 100644
>> --- a/drivers/iommu/intel-iommu.c
>> +++ b/drivers/iommu/intel-iommu.c
>> @@ -168,6 +168,11 @@ static inline unsigned long virt_to_dma_pfn(void
>> *p)
>>   	return page_to_dma_pfn(virt_to_page(p));
>>   }
>>
>> +static inline unsigned long dma_pfn_to_addr(unsigned long pfn)
>> +{
>> +	return pfn << VTD_PAGE_SHIFT;
>> +}
>> +
>>   /* global iommu list, set NULL for ignored DMAR units */
>>   static struct intel_iommu **g_iommus;
>>
>> @@ -307,6 +312,9 @@ static int hw_pass_through = 1;
>>    */
>>   #define DOMAIN_FLAG_LOSE_CHILDREN		BIT(1)
>>
>> +/* Domain uses first level translation for DMA remapping. */
>> +#define DOMAIN_FLAG_FIRST_LEVEL_TRANS		BIT(2)
>> +
>>   #define for_each_domain_iommu(idx, domain)			\
>>   	for (idx = 0; idx < g_num_of_iommus; idx++)		\
>>   		if (domain->iommu_refcnt[idx])
>> @@ -552,6 +560,11 @@ static inline int domain_type_is_si(struct
>> dmar_domain *domain)
>>   	return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
>>   }
>>
>> +static inline int domain_type_is_flt(struct dmar_domain *domain)
>> +{
>> +	return domain->flags & DOMAIN_FLAG_FIRST_LEVEL_TRANS;
>> +}
>> +
>>   static inline int domain_pfn_supported(struct dmar_domain *domain,
>>   				       unsigned long pfn)
>>   {
>> @@ -1147,8 +1160,15 @@ static struct page *domain_unmap(struct
>> dmar_domain *domain,
>>   	BUG_ON(start_pfn > last_pfn);
>>
>>   	/* we don't need lock here; nobody else touches the iova range */
>> -	freelist = dma_pte_clear_level(domain, agaw_to_level(domain-
>>> agaw),
>> -				       domain->pgd, 0, start_pfn, last_pfn,
>> NULL);
>> +	if (domain_type_is_flt(domain))
>> +		freelist = intel_mmunmap_range(domain,
>> +					       dma_pfn_to_addr(start_pfn),
>> +					       dma_pfn_to_addr(last_pfn + 1));
>> +	else
>> +		freelist = dma_pte_clear_level(domain,
>> +					       agaw_to_level(domain->agaw),
>> +					       domain->pgd, 0, start_pfn,
>> +					       last_pfn, NULL);
> 
> what about providing an unified interface at the caller side, then having
> the level differentiated within the interface?

Good point! I ever thought about adding some ops in struct dmar_domain,
something like:

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index ed11ef594378..1dd184f76bfb 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -489,7 +489,14 @@ struct dmar_domain {
         struct list_head auxd;          /* link to device's auxiliary 
list */
         struct iova_domain iovad;       /* iova's that belong to this 
domain */

+       /* per domain page table and manipulation ops */
         struct dma_pte  *pgd;           /* virtual address */
+       int (*map)(struct dmar_domain *domain,
+                  unsigned long addr, unsigned long end,
+                  phys_addr_t phys_addr, int dma_prot);
+       struct page *(*unmap)(struct dmar_domain *domain,
+                             unsigned long addr, unsigned long end);
+
         int             gaw;            /* max guest address width */

         /* adjusted guest address width, 0 is level 2 30-bit */

So that this code could be simply like this:

	freelist = domain->unmap(...);

Best regards,
Baolu
diff mbox series

Patch

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 9cfe8098d993..103480016010 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -168,6 +168,11 @@  static inline unsigned long virt_to_dma_pfn(void *p)
 	return page_to_dma_pfn(virt_to_page(p));
 }
 
+static inline unsigned long dma_pfn_to_addr(unsigned long pfn)
+{
+	return pfn << VTD_PAGE_SHIFT;
+}
+
 /* global iommu list, set NULL for ignored DMAR units */
 static struct intel_iommu **g_iommus;
 
@@ -307,6 +312,9 @@  static int hw_pass_through = 1;
  */
 #define DOMAIN_FLAG_LOSE_CHILDREN		BIT(1)
 
+/* Domain uses first level translation for DMA remapping. */
+#define DOMAIN_FLAG_FIRST_LEVEL_TRANS		BIT(2)
+
 #define for_each_domain_iommu(idx, domain)			\
 	for (idx = 0; idx < g_num_of_iommus; idx++)		\
 		if (domain->iommu_refcnt[idx])
@@ -552,6 +560,11 @@  static inline int domain_type_is_si(struct dmar_domain *domain)
 	return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
 }
 
+static inline int domain_type_is_flt(struct dmar_domain *domain)
+{
+	return domain->flags & DOMAIN_FLAG_FIRST_LEVEL_TRANS;
+}
+
 static inline int domain_pfn_supported(struct dmar_domain *domain,
 				       unsigned long pfn)
 {
@@ -1147,8 +1160,15 @@  static struct page *domain_unmap(struct dmar_domain *domain,
 	BUG_ON(start_pfn > last_pfn);
 
 	/* we don't need lock here; nobody else touches the iova range */
-	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
-				       domain->pgd, 0, start_pfn, last_pfn, NULL);
+	if (domain_type_is_flt(domain))
+		freelist = intel_mmunmap_range(domain,
+					       dma_pfn_to_addr(start_pfn),
+					       dma_pfn_to_addr(last_pfn + 1));
+	else
+		freelist = dma_pte_clear_level(domain,
+					       agaw_to_level(domain->agaw),
+					       domain->pgd, 0, start_pfn,
+					       last_pfn, NULL);
 
 	/* free pgd */
 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
@@ -2213,9 +2233,10 @@  static inline int hardware_largepage_caps(struct dmar_domain *domain,
 	return level;
 }
 
-static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
-			    struct scatterlist *sg, unsigned long phys_pfn,
-			    unsigned long nr_pages, int prot)
+static int
+__domain_mapping_dma(struct dmar_domain *domain, unsigned long iov_pfn,
+		     struct scatterlist *sg, unsigned long phys_pfn,
+		     unsigned long nr_pages, int prot)
 {
 	struct dma_pte *first_pte = NULL, *pte = NULL;
 	phys_addr_t uninitialized_var(pteval);
@@ -2223,13 +2244,6 @@  static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 	unsigned int largepage_lvl = 0;
 	unsigned long lvl_pages = 0;
 
-	BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
-
-	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
-		return -EINVAL;
-
-	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
-
 	if (!sg) {
 		sg_res = nr_pages;
 		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
@@ -2328,6 +2342,62 @@  static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 	return 0;
 }
 
+static int
+__domain_mapping_mm(struct dmar_domain *domain, unsigned long iov_pfn,
+		    struct scatterlist *sg, unsigned long phys_pfn,
+		    unsigned long nr_pages, int prot)
+{
+	int ret = 0;
+
+	if (!sg)
+		return intel_mmmap_range(domain, dma_pfn_to_addr(iov_pfn),
+					 dma_pfn_to_addr(iov_pfn + nr_pages),
+					 dma_pfn_to_addr(phys_pfn), prot);
+
+	while (nr_pages > 0) {
+		unsigned long sg_pages, phys;
+		unsigned long pgoff = sg->offset & ~PAGE_MASK;
+
+		sg_pages = aligned_nrpages(sg->offset, sg->length);
+		phys = sg_phys(sg) - pgoff;
+
+		ret = intel_mmmap_range(domain, dma_pfn_to_addr(iov_pfn),
+					dma_pfn_to_addr(iov_pfn + sg_pages),
+					phys, prot);
+		if (ret)
+			break;
+
+		sg->dma_address = ((dma_addr_t)dma_pfn_to_addr(iov_pfn)) + pgoff;
+		sg->dma_length = sg->length;
+
+		nr_pages -= sg_pages;
+		iov_pfn += sg_pages;
+		sg = sg_next(sg);
+	}
+
+	return ret;
+}
+
+static int
+__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
+		 struct scatterlist *sg, unsigned long phys_pfn,
+		 unsigned long nr_pages, int prot)
+{
+	BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
+
+	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
+		return -EINVAL;
+
+	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
+
+	if (domain_type_is_flt(domain))
+		return __domain_mapping_mm(domain, iov_pfn, sg,
+					   phys_pfn, nr_pages, prot);
+	else
+		return __domain_mapping_dma(domain, iov_pfn, sg,
+					    phys_pfn, nr_pages, prot);
+}
+
 static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 			  struct scatterlist *sg, unsigned long phys_pfn,
 			  unsigned long nr_pages, int prot)