diff mbox series

[v3,5/6] iommu/vt-d: Flush PASID-based iotlb for iova over first level

Message ID 20191211021219.8997-6-baolu.lu@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Use 1st-level for IOVA translation | expand

Commit Message

Baolu Lu Dec. 11, 2019, 2:12 a.m. UTC
When software has changed first-level tables, it should invalidate
the affected IOTLB and the paging-structure-caches using the PASID-
based-IOTLB Invalidate Descriptor defined in spec 6.5.2.4.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 drivers/iommu/dmar.c        | 41 ++++++++++++++++++++++++++++++++++
 drivers/iommu/intel-iommu.c | 44 ++++++++++++++++++++++++-------------
 include/linux/intel-iommu.h |  2 ++
 3 files changed, 72 insertions(+), 15 deletions(-)

Comments

Yi Liu Dec. 13, 2019, 11:42 a.m. UTC | #1
Hi Allen,

> From: kvm-owner@vger.kernel.org [mailto:kvm-owner@vger.kernel.org] On Behalf
> Of Lu Baolu
> Sent: Wednesday, December 11, 2019 10:12 AM
> To: Joerg Roedel <joro@8bytes.org>; David Woodhouse <dwmw2@infradead.org>;
> Subject: [PATCH v3 5/6] iommu/vt-d: Flush PASID-based iotlb for iova over first level
> 
> When software has changed first-level tables, it should invalidate
> the affected IOTLB and the paging-structure-caches using the PASID-
> based-IOTLB Invalidate Descriptor defined in spec 6.5.2.4.
> 
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
>  drivers/iommu/dmar.c        | 41 ++++++++++++++++++++++++++++++++++
>  drivers/iommu/intel-iommu.c | 44 ++++++++++++++++++++++++-------------
>  include/linux/intel-iommu.h |  2 ++
>  3 files changed, 72 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
> index 3acfa6a25fa2..fb30d5053664 100644
> --- a/drivers/iommu/dmar.c
> +++ b/drivers/iommu/dmar.c
> @@ -1371,6 +1371,47 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16
> sid, u16 pfsid,
>  	qi_submit_sync(&desc, iommu);
>  }
> 
> +/* PASID-based IOTLB invalidation */
> +void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
> +		     unsigned long npages, bool ih)
> +{
> +	struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
> +
> +	/*
> +	 * npages == -1 means a PASID-selective invalidation, otherwise,
> +	 * a positive value for Page-selective-within-PASID invalidation.
> +	 * 0 is not a valid input.
> +	 */
> +	if (WARN_ON(!npages)) {
> +		pr_err("Invalid input npages = %ld\n", npages);
> +		return;
> +	}
> +
> +	if (npages == -1) {
> +		desc.qw0 = QI_EIOTLB_PASID(pasid) |
> +				QI_EIOTLB_DID(did) |
> +				QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
> +				QI_EIOTLB_TYPE;
> +		desc.qw1 = 0;
> +	} else {
> +		int mask = ilog2(__roundup_pow_of_two(npages));
> +		unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
> +
> +		if (WARN_ON_ONCE(!ALIGN(addr, align)))
> +			addr &= ~(align - 1);
> +
> +		desc.qw0 = QI_EIOTLB_PASID(pasid) |
> +				QI_EIOTLB_DID(did) |
> +				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
> +				QI_EIOTLB_TYPE;
> +		desc.qw1 = QI_EIOTLB_ADDR(addr) |
> +				QI_EIOTLB_IH(ih) |
> +				QI_EIOTLB_AM(mask);
> +	}
> +
> +	qi_submit_sync(&desc, iommu);
> +}
> +
>  /*
>   * Disable Queued Invalidation interface.
>   */
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 83a7abf0c4f0..e47f5fe37b59 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -1520,18 +1520,24 @@ static void iommu_flush_iotlb_psi(struct intel_iommu
> *iommu,
> 
>  	if (ih)
>  		ih = 1 << 6;
> -	/*
> -	 * Fallback to domain selective flush if no PSI support or the size is
> -	 * too big.
> -	 * PSI requires page size to be 2 ^ x, and the base address is naturally
> -	 * aligned to the size
> -	 */
> -	if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu-
> >cap))
> -		iommu->flush.flush_iotlb(iommu, did, 0, 0,
> -						DMA_TLB_DSI_FLUSH);
> -	else
> -		iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
> -						DMA_TLB_PSI_FLUSH);
> +
> +	if (domain_use_first_level(domain)) {
> +		qi_flush_piotlb(iommu, did, domain->default_pasid,
> +				addr, pages, ih);

I'm not sure if my understanding is correct. But let me tell a story.
Assuming we assign a mdev and a PF/VF to a single VM, then there
will be p_iotlb tagged with PASID_RID2PASID and p_iotlb tagged with
default_pasid. We may want to flush both... If this operation is
invoked per-device, then need to pass in a hint to indicate whether
to use PASID_RID2PASID or default_pasid, or you may just issue two
flush with the two PASID values. Thoughts?

Regards,
Yi Liu
Baolu Lu Dec. 14, 2019, 3:24 a.m. UTC | #2
Hi Liu Yi,

On 12/13/19 7:42 PM, Liu, Yi L wrote:
>> From: kvm-owner@vger.kernel.org [mailto:kvm-owner@vger.kernel.org] On Behalf
>> Of Lu Baolu
>> Sent: Wednesday, December 11, 2019 10:12 AM
>> To: Joerg Roedel <joro@8bytes.org>; David Woodhouse <dwmw2@infradead.org>;
>> Subject: [PATCH v3 5/6] iommu/vt-d: Flush PASID-based iotlb for iova over first level
>>
>> When software has changed first-level tables, it should invalidate
>> the affected IOTLB and the paging-structure-caches using the PASID-
>> based-IOTLB Invalidate Descriptor defined in spec 6.5.2.4.
>>
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> ---
>>   drivers/iommu/dmar.c        | 41 ++++++++++++++++++++++++++++++++++
>>   drivers/iommu/intel-iommu.c | 44 ++++++++++++++++++++++++-------------
>>   include/linux/intel-iommu.h |  2 ++
>>   3 files changed, 72 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
>> index 3acfa6a25fa2..fb30d5053664 100644
>> --- a/drivers/iommu/dmar.c
>> +++ b/drivers/iommu/dmar.c
>> @@ -1371,6 +1371,47 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16
>> sid, u16 pfsid,
>>   	qi_submit_sync(&desc, iommu);
>>   }
>>
>> +/* PASID-based IOTLB invalidation */
>> +void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
>> +		     unsigned long npages, bool ih)
>> +{
>> +	struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
>> +
>> +	/*
>> +	 * npages == -1 means a PASID-selective invalidation, otherwise,
>> +	 * a positive value for Page-selective-within-PASID invalidation.
>> +	 * 0 is not a valid input.
>> +	 */
>> +	if (WARN_ON(!npages)) {
>> +		pr_err("Invalid input npages = %ld\n", npages);
>> +		return;
>> +	}
>> +
>> +	if (npages == -1) {
>> +		desc.qw0 = QI_EIOTLB_PASID(pasid) |
>> +				QI_EIOTLB_DID(did) |
>> +				QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
>> +				QI_EIOTLB_TYPE;
>> +		desc.qw1 = 0;
>> +	} else {
>> +		int mask = ilog2(__roundup_pow_of_two(npages));
>> +		unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
>> +
>> +		if (WARN_ON_ONCE(!ALIGN(addr, align)))
>> +			addr &= ~(align - 1);
>> +
>> +		desc.qw0 = QI_EIOTLB_PASID(pasid) |
>> +				QI_EIOTLB_DID(did) |
>> +				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
>> +				QI_EIOTLB_TYPE;
>> +		desc.qw1 = QI_EIOTLB_ADDR(addr) |
>> +				QI_EIOTLB_IH(ih) |
>> +				QI_EIOTLB_AM(mask);
>> +	}
>> +
>> +	qi_submit_sync(&desc, iommu);
>> +}
>> +
>>   /*
>>    * Disable Queued Invalidation interface.
>>    */
>> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
>> index 83a7abf0c4f0..e47f5fe37b59 100644
>> --- a/drivers/iommu/intel-iommu.c
>> +++ b/drivers/iommu/intel-iommu.c
>> @@ -1520,18 +1520,24 @@ static void iommu_flush_iotlb_psi(struct intel_iommu
>> *iommu,
>>
>>   	if (ih)
>>   		ih = 1 << 6;
>> -	/*
>> -	 * Fallback to domain selective flush if no PSI support or the size is
>> -	 * too big.
>> -	 * PSI requires page size to be 2 ^ x, and the base address is naturally
>> -	 * aligned to the size
>> -	 */
>> -	if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu-
>>> cap))
>> -		iommu->flush.flush_iotlb(iommu, did, 0, 0,
>> -						DMA_TLB_DSI_FLUSH);
>> -	else
>> -		iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
>> -						DMA_TLB_PSI_FLUSH);
>> +
>> +	if (domain_use_first_level(domain)) {
>> +		qi_flush_piotlb(iommu, did, domain->default_pasid,
>> +				addr, pages, ih);
> 
> I'm not sure if my understanding is correct. But let me tell a story.
> Assuming we assign a mdev and a PF/VF to a single VM, then there
> will be p_iotlb tagged with PASID_RID2PASID and p_iotlb tagged with
> default_pasid. We may want to flush both... If this operation is

I assume that SRIOV and SIOV are exclusive. You can't enable both SRIOV
and SIOV on a single device. So the mdev and PF/VF are from different
devices, right?

Or, in SRIOV case, you can wrap a PF or VF as a mediated device. But
this mdev still be backed with a pasid of RID2PASID.

> invoked per-device, then need to pass in a hint to indicate whether
> to use PASID_RID2PASID or default_pasid, or you may just issue two
> flush with the two PASID values. Thoughts?

This is per-domain and each domain has specific domain id and default
pasid (assume default domain is 0 in RID2PASID case).

Best regards,
baolu
Yi Liu Dec. 15, 2019, 9:22 a.m. UTC | #3
Hi Baolu,

Please check replies below:

> From: Lu Baolu [mailto:baolu.lu@linux.intel.com]
> Sent: Saturday, December 14, 2019 11:24 AM
> To: Liu, Yi L <yi.l.liu@intel.com>; Joerg Roedel <joro@8bytes.org>; David
> Woodhouse <dwmw2@infradead.org>; Alex Williamson
> <alex.williamson@redhat.com>
> Subject: Re: [PATCH v3 5/6] iommu/vt-d: Flush PASID-based iotlb for iova over first
> level
> 
> Hi Liu Yi,
> 
> On 12/13/19 7:42 PM, Liu, Yi L wrote:
> >> From: kvm-owner@vger.kernel.org [mailto:kvm-owner@vger.kernel.org] On
> Behalf
> >> Of Lu Baolu
> >> Sent: Wednesday, December 11, 2019 10:12 AM
> >> To: Joerg Roedel <joro@8bytes.org>; David Woodhouse
> <dwmw2@infradead.org>;
> >> Subject: [PATCH v3 5/6] iommu/vt-d: Flush PASID-based iotlb for iova over first
> level
> >>
> >> When software has changed first-level tables, it should invalidate
> >> the affected IOTLB and the paging-structure-caches using the PASID-
> >> based-IOTLB Invalidate Descriptor defined in spec 6.5.2.4.
> >>
> >> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> >> ---
> >>   drivers/iommu/dmar.c        | 41 ++++++++++++++++++++++++++++++++++
> >>   drivers/iommu/intel-iommu.c | 44 ++++++++++++++++++++++++-------------
> >>   include/linux/intel-iommu.h |  2 ++
> >>   3 files changed, 72 insertions(+), 15 deletions(-)
> >>
> >> diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
> >> index 3acfa6a25fa2..fb30d5053664 100644
> >> --- a/drivers/iommu/dmar.c
> >> +++ b/drivers/iommu/dmar.c
> >> @@ -1371,6 +1371,47 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu,
> u16
> >> sid, u16 pfsid,
> >>   	qi_submit_sync(&desc, iommu);
> >>   }
> >>
> >> +/* PASID-based IOTLB invalidation */
> >> +void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
> >> +		     unsigned long npages, bool ih)
> >> +{
> >> +	struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
> >> +
> >> +	/*
> >> +	 * npages == -1 means a PASID-selective invalidation, otherwise,
> >> +	 * a positive value for Page-selective-within-PASID invalidation.
> >> +	 * 0 is not a valid input.
> >> +	 */
> >> +	if (WARN_ON(!npages)) {
> >> +		pr_err("Invalid input npages = %ld\n", npages);
> >> +		return;
> >> +	}
> >> +
> >> +	if (npages == -1) {
> >> +		desc.qw0 = QI_EIOTLB_PASID(pasid) |
> >> +				QI_EIOTLB_DID(did) |
> >> +				QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
> >> +				QI_EIOTLB_TYPE;
> >> +		desc.qw1 = 0;
> >> +	} else {
> >> +		int mask = ilog2(__roundup_pow_of_two(npages));
> >> +		unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
> >> +
> >> +		if (WARN_ON_ONCE(!ALIGN(addr, align)))
> >> +			addr &= ~(align - 1);
> >> +
> >> +		desc.qw0 = QI_EIOTLB_PASID(pasid) |
> >> +				QI_EIOTLB_DID(did) |
> >> +				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
> >> +				QI_EIOTLB_TYPE;
> >> +		desc.qw1 = QI_EIOTLB_ADDR(addr) |
> >> +				QI_EIOTLB_IH(ih) |
> >> +				QI_EIOTLB_AM(mask);
> >> +	}
> >> +
> >> +	qi_submit_sync(&desc, iommu);
> >> +}
> >> +
> >>   /*
> >>    * Disable Queued Invalidation interface.
> >>    */
> >> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> >> index 83a7abf0c4f0..e47f5fe37b59 100644
> >> --- a/drivers/iommu/intel-iommu.c
> >> +++ b/drivers/iommu/intel-iommu.c
> >> @@ -1520,18 +1520,24 @@ static void iommu_flush_iotlb_psi(struct
> intel_iommu
> >> *iommu,
> >>
> >>   	if (ih)
> >>   		ih = 1 << 6;
> >> -	/*
> >> -	 * Fallback to domain selective flush if no PSI support or the size is
> >> -	 * too big.
> >> -	 * PSI requires page size to be 2 ^ x, and the base address is naturally
> >> -	 * aligned to the size
> >> -	 */
> >> -	if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu-
> >>> cap))
> >> -		iommu->flush.flush_iotlb(iommu, did, 0, 0,
> >> -						DMA_TLB_DSI_FLUSH);
> >> -	else
> >> -		iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
> >> -						DMA_TLB_PSI_FLUSH);
> >> +
> >> +	if (domain_use_first_level(domain)) {
> >> +		qi_flush_piotlb(iommu, did, domain->default_pasid,
> >> +				addr, pages, ih);
> >
> > I'm not sure if my understanding is correct. But let me tell a story.
> > Assuming we assign a mdev and a PF/VF to a single VM, then there
> > will be p_iotlb tagged with PASID_RID2PASID and p_iotlb tagged with
> > default_pasid. We may want to flush both... If this operation is
> 
> I assume that SRIOV and SIOV are exclusive. You can't enable both SRIOV
> and SIOV on a single device.

yes, but I'm not talking use them on a single device...

> So the mdev and PF/VF are from different
> devices, right?

yes, the case I mentioned above is: a mdev from a device (say devA),
and another device (say devB). Create mdev on devA and assign it to
a VM together with devB.

> 
> Or, in SRIOV case, you can wrap a PF or VF as a mediated device. But
> this mdev still be backed with a pasid of RID2PASID.

My comment has no business with wrapping PF/VR as mdev...

> 
> > invoked per-device, then need to pass in a hint to indicate whether
> > to use PASID_RID2PASID or default_pasid, or you may just issue two
> > flush with the two PASID values. Thoughts?
> 
> This is per-domain and each domain has specific domain id and default
> pasid (assume default domain is 0 in RID2PASID case).

Ok, let me explain more... default pasid is meaningful only when
the domain has been attached to a device as an aux-domain. right?
If a domain only has one device, and it is attached to this device as
normal domain (normal domain means non aux-domain here). Then
you should flush cache with domain-id and RID2PASID value.
If a domain has one device, and it is attached to this device as
aux-domain. Then you may want to flush cache with domain-id
and default pasid. right?
Then let's come to the case I mentioned in previous email. a mdev
and another device assigned to a single VM. In host, you will have
a domain which has two devices, one device(deva) is attached as
normal domain, another one (devB) is attached as aux-domain. Then
which pasid should be used when the mapping in IOVA page table is
modified? RID2PASID or default pasid? I think both should be used
since the domain means differently to the two devices. If you just
use default pasid, then deva may still be able to use stale caches.

Regards,
Yi Liu
Baolu Lu Dec. 17, 2019, 1:19 a.m. UTC | #4
Hi Yi,

On 12/15/19 5:22 PM, Liu, Yi L wrote:
> Ok, let me explain more... default pasid is meaningful only when
> the domain has been attached to a device as an aux-domain. right?

No exactly. Each domain has a specific default pasid, no matter normal
domain (RID based) or aux-domain (PASID based). The difference is for a
normal domain RID2PASID value is used, for an aux-domain the pasid is
allocated from a global pool.

The same concept used in VT-d 3.x scalable mode. For RID based DMA
translation RID2PASID value is used when walking the tables; For PASID
based DMA translation a real pasid in the transaction is used.

> If a domain only has one device, and it is attached to this device as
> normal domain (normal domain means non aux-domain here). Then
> you should flush cache with domain-id and RID2PASID value.
> If a domain has one device, and it is attached to this device as
> aux-domain. Then you may want to flush cache with domain-id
> and default pasid. right?

A domain's counterpart is IOMMU group. So we say attach/detach domain
to/from devices in a group. We don't allow devices with different
default pasid sitting in a same group, right?

> Then let's come to the case I mentioned in previous email. a mdev
> and another device assigned to a single VM. In host, you will have
> a domain which has two devices, one device(deva) is attached as

No. We will have two IOMMU groups and two domains. Correct me if my
understanding is not right.

Best regards,
baolu

> normal domain, another one (devB) is attached as aux-domain. Then
> which pasid should be used when the mapping in IOVA page table is
> modified? RID2PASID or default pasid? I think both should be used
> since the domain means differently to the two devices. If you just
> use default pasid, then deva may still be able to use stale caches.
> 
> Regards,
> Yi Liu
Baolu Lu Dec. 17, 2019, 1:37 a.m. UTC | #5
Hi again,

On 12/17/19 9:19 AM, Lu Baolu wrote:
> Hi Yi,
> 
> On 12/15/19 5:22 PM, Liu, Yi L wrote:
>> Ok, let me explain more... default pasid is meaningful only when
>> the domain has been attached to a device as an aux-domain. right?
> 
> No exactly. Each domain has a specific default pasid, no matter normal
> domain (RID based) or aux-domain (PASID based). The difference is for a
> normal domain RID2PASID value is used, for an aux-domain the pasid is
> allocated from a global pool.
> 
> The same concept used in VT-d 3.x scalable mode. For RID based DMA
> translation RID2PASID value is used when walking the tables; For PASID
> based DMA translation a real pasid in the transaction is used.
> 
>> If a domain only has one device, and it is attached to this device as
>> normal domain (normal domain means non aux-domain here). Then
>> you should flush cache with domain-id and RID2PASID value.
>> If a domain has one device, and it is attached to this device as
>> aux-domain. Then you may want to flush cache with domain-id
>> and default pasid. right?
> 
> A domain's counterpart is IOMMU group. So we say attach/detach domain
> to/from devices in a group. We don't allow devices with different
> default pasid sitting in a same group, right?
> 
>> Then let's come to the case I mentioned in previous email. a mdev
>> and another device assigned to a single VM. In host, you will have
>> a domain which has two devices, one device(deva) is attached as
> 
> No. We will have two IOMMU groups and two domains. Correct me if my
> understanding is not right.

Reconsidered this. Unfortunately, my understanding is not right. :-(

A single domain could be attached to multiple IOMMU groups. So it
comes to the issue you concerned. Do I understand it right?

> 
>> normal domain, another one (devB) is attached as aux-domain. Then
>> which pasid should be used when the mapping in IOVA page table is
>> modified? RID2PASID or default pasid? I think both should be used
>> since the domain means differently to the two devices. If you just
>> use default pasid, then deva may still be able to use stale caches.

You are right. I will change it accordingly. The logic should look
like:

if (domain attached to physical device)
	flush_piotlb_with_RID2PASID()
else if (domain_attached_to_mdev_device)
	flush_piotlb_with_default_pasid()

Does this work for you? Thanks for catching this!

Best regards,
baolu
Baolu Lu Dec. 17, 2019, 1:39 a.m. UTC | #6
Hi,

On 12/17/19 9:37 AM, Lu Baolu wrote:
> You are right. I will change it accordingly. The logic should look
> like:
> 
> if (domain attached to physical device)
>      flush_piotlb_with_RID2PASID()
> else if (domain_attached_to_mdev_device)
>      flush_piotlb_with_default_pasid()
> 

Both! so no "else" here.

Best regards,
baolu

> Does this work for you? Thanks for catching this!
Yi Liu Dec. 17, 2019, 2:26 a.m. UTC | #7
> From: Lu Baolu [mailto:baolu.lu@linux.intel.com]
> Sent: Tuesday, December 17, 2019 9:37 AM
> To: Liu, Yi L <yi.l.liu@intel.com>; Joerg Roedel <joro@8bytes.org>; David
> Woodhouse <dwmw2@infradead.org>; Alex Williamson
> <alex.williamson@redhat.com>
> Subject: Re: [PATCH v3 5/6] iommu/vt-d: Flush PASID-based iotlb for iova over first
> level
> 
> Hi again,
> 
> On 12/17/19 9:19 AM, Lu Baolu wrote:
> > Hi Yi,
> >
> > On 12/15/19 5:22 PM, Liu, Yi L wrote:
> >> Ok, let me explain more... default pasid is meaningful only when
> >> the domain has been attached to a device as an aux-domain. right?
> >
> > No exactly. Each domain has a specific default pasid, no matter normal
> > domain (RID based) or aux-domain (PASID based). The difference is for a
> > normal domain RID2PASID value is used, for an aux-domain the pasid is
> > allocated from a global pool.
> >
> > The same concept used in VT-d 3.x scalable mode. For RID based DMA
> > translation RID2PASID value is used when walking the tables; For PASID
> > based DMA translation a real pasid in the transaction is used.
> >
> >> If a domain only has one device, and it is attached to this device as
> >> normal domain (normal domain means non aux-domain here). Then
> >> you should flush cache with domain-id and RID2PASID value.
> >> If a domain has one device, and it is attached to this device as
> >> aux-domain. Then you may want to flush cache with domain-id
> >> and default pasid. right?
> >
> > A domain's counterpart is IOMMU group. So we say attach/detach domain
> > to/from devices in a group. We don't allow devices with different
> > default pasid sitting in a same group, right?
> >
> >> Then let's come to the case I mentioned in previous email. a mdev
> >> and another device assigned to a single VM. In host, you will have
> >> a domain which has two devices, one device(deva) is attached as
> >
> > No. We will have two IOMMU groups and two domains. Correct me if my
> > understanding is not right.
> 
> Reconsidered this. Unfortunately, my understanding is not right. :-(
> 
> A single domain could be attached to multiple IOMMU groups. So it
> comes to the issue you concerned. Do I understand it right?

yes. Device within the same group has no such issue since such
devices are not able to enabled aux-domain. Now our understanding
are aligned. :-)

> >
> >> normal domain, another one (devB) is attached as aux-domain. Then
> >> which pasid should be used when the mapping in IOVA page table is
> >> modified? RID2PASID or default pasid? I think both should be used
> >> since the domain means differently to the two devices. If you just
> >> use default pasid, then deva may still be able to use stale caches.
> 
> You are right. I will change it accordingly. The logic should look
> like:
> 
> if (domain attached to physical device)
> 	flush_piotlb_with_RID2PASID()
> else if (domain_attached_to_mdev_device)
> 	flush_piotlb_with_default_pasid()
> 
> Does this work for you? Thanks for catching this!

If no else, it would work for scalable mode. ^_^ I noticed you've
already corrected by yourself in another reply. :-) Look forward to
your next version.

Regards,
Yi Liu
Yi Liu Dec. 17, 2019, 2:36 a.m. UTC | #8
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Tuesday, December 17, 2019 10:26 AM
> To: Lu Baolu <baolu.lu@linux.intel.com>; Joerg Roedel <joro@8bytes.org>; David
> Woodhouse <dwmw2@infradead.org>; Alex Williamson
> <alex.williamson@redhat.com>
> Subject: RE: [PATCH v3 5/6] iommu/vt-d: Flush PASID-based iotlb for iova over first
> level
> 
> > From: Lu Baolu [mailto:baolu.lu@linux.intel.com]
> > Sent: Tuesday, December 17, 2019 9:37 AM
> > To: Liu, Yi L <yi.l.liu@intel.com>; Joerg Roedel <joro@8bytes.org>; David
> > Woodhouse <dwmw2@infradead.org>; Alex Williamson
> > <alex.williamson@redhat.com>
> > Subject: Re: [PATCH v3 5/6] iommu/vt-d: Flush PASID-based iotlb for iova over first
> > level
> >
> > Hi again,
> >
> > On 12/17/19 9:19 AM, Lu Baolu wrote:
> > > Hi Yi,
> > >
> > > On 12/15/19 5:22 PM, Liu, Yi L wrote:
> > >> Ok, let me explain more... default pasid is meaningful only when
> > >> the domain has been attached to a device as an aux-domain. right?
> > >
> > > No exactly. Each domain has a specific default pasid, no matter normal
> > > domain (RID based) or aux-domain (PASID based). The difference is for a
> > > normal domain RID2PASID value is used, for an aux-domain the pasid is
> > > allocated from a global pool.
> > >
> > > The same concept used in VT-d 3.x scalable mode. For RID based DMA
> > > translation RID2PASID value is used when walking the tables; For PASID
> > > based DMA translation a real pasid in the transaction is used.
> > >
> > >> If a domain only has one device, and it is attached to this device as
> > >> normal domain (normal domain means non aux-domain here). Then
> > >> you should flush cache with domain-id and RID2PASID value.
> > >> If a domain has one device, and it is attached to this device as
> > >> aux-domain. Then you may want to flush cache with domain-id
> > >> and default pasid. right?
> > >
> > > A domain's counterpart is IOMMU group. So we say attach/detach domain
> > > to/from devices in a group. We don't allow devices with different
> > > default pasid sitting in a same group, right?
> > >
> > >> Then let's come to the case I mentioned in previous email. a mdev
> > >> and another device assigned to a single VM. In host, you will have
> > >> a domain which has two devices, one device(deva) is attached as
> > >
> > > No. We will have two IOMMU groups and two domains. Correct me if my
> > > understanding is not right.
> >
> > Reconsidered this. Unfortunately, my understanding is not right. :-(
> >
> > A single domain could be attached to multiple IOMMU groups. So it
> > comes to the issue you concerned. Do I understand it right?
> 
> yes. Device within the same group has no such issue since such
> devices are not able to enabled aux-domain. Now our understanding
> are aligned. :-)
> 
> > >
> > >> normal domain, another one (devB) is attached as aux-domain. Then
> > >> which pasid should be used when the mapping in IOVA page table is
> > >> modified? RID2PASID or default pasid? I think both should be used
> > >> since the domain means differently to the two devices. If you just
> > >> use default pasid, then deva may still be able to use stale caches.
> >
> > You are right. I will change it accordingly. The logic should look
> > like:
> >
> > if (domain attached to physical device)
> > 	flush_piotlb_with_RID2PASID()
> > else if (domain_attached_to_mdev_device)
> > 	flush_piotlb_with_default_pasid()
> >
> > Does this work for you? Thanks for catching this!
> 
> If no else, it would work for scalable mode. ^_^ I noticed you've
> already corrected by yourself in another reply. :-) Look forward to
> your next version.

BTW. The discussion in this thread may apply to other cache flush
in your series. Please have a check. At least, there are two places which
need to be updated in this single patch.
 
Regards,
Yi Liu
Yi Liu Dec. 17, 2019, 2:44 a.m. UTC | #9
> From: Lu Baolu [mailto:baolu.lu@linux.intel.com]
> Sent: Tuesday, December 17, 2019 9:39 AM
> To: Liu, Yi L <yi.l.liu@intel.com>; Joerg Roedel <joro@8bytes.org>; David
> Woodhouse <dwmw2@infradead.org>; Alex Williamson
> <alex.williamson@redhat.com>
> Subject: Re: [PATCH v3 5/6] iommu/vt-d: Flush PASID-based iotlb for iova over first
> level
> 
> Hi,
> 
> On 12/17/19 9:37 AM, Lu Baolu wrote:
> > You are right. I will change it accordingly. The logic should look
> > like:
> >
> > if (domain attached to physical device)
> >      flush_piotlb_with_RID2PASID()
> > else if (domain_attached_to_mdev_device)
> >      flush_piotlb_with_default_pasid()
> >
> 
> Both! so no "else" here.

aha, if we want to flush more precisely, we may check whether
domain->default_pasid is allocated. If not, it means no mdev is
involved, then we may save a p_iotlb_inv_dsc submission and also
save VT-d hardware circles from doing useless flush. This is just
optimization, it's up to you to pick it or not in this patchset. I'm
fine with flush "both" since it guarantees correctness.

Regards,
Yi Liu
Baolu Lu Dec. 17, 2019, 4:13 a.m. UTC | #10
Hi,

On 2019/12/17 10:36, Liu, Yi L wrote:
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Sent: Tuesday, December 17, 2019 10:26 AM
>> To: Lu Baolu <baolu.lu@linux.intel.com>; Joerg Roedel <joro@8bytes.org>; David
>> Woodhouse <dwmw2@infradead.org>; Alex Williamson
>> <alex.williamson@redhat.com>
>> Subject: RE: [PATCH v3 5/6] iommu/vt-d: Flush PASID-based iotlb for iova over first
>> level
>>
>>> From: Lu Baolu [mailto:baolu.lu@linux.intel.com]
>>> Sent: Tuesday, December 17, 2019 9:37 AM
>>> To: Liu, Yi L <yi.l.liu@intel.com>; Joerg Roedel <joro@8bytes.org>; David
>>> Woodhouse <dwmw2@infradead.org>; Alex Williamson
>>> <alex.williamson@redhat.com>
>>> Subject: Re: [PATCH v3 5/6] iommu/vt-d: Flush PASID-based iotlb for iova over first
>>> level
>>>
>>> Hi again,
>>>
>>> On 12/17/19 9:19 AM, Lu Baolu wrote:
>>>> Hi Yi,
>>>>
>>>> On 12/15/19 5:22 PM, Liu, Yi L wrote:
>>>>> Ok, let me explain more... default pasid is meaningful only when
>>>>> the domain has been attached to a device as an aux-domain. right?
>>>> No exactly. Each domain has a specific default pasid, no matter normal
>>>> domain (RID based) or aux-domain (PASID based). The difference is for a
>>>> normal domain RID2PASID value is used, for an aux-domain the pasid is
>>>> allocated from a global pool.
>>>>
>>>> The same concept used in VT-d 3.x scalable mode. For RID based DMA
>>>> translation RID2PASID value is used when walking the tables; For PASID
>>>> based DMA translation a real pasid in the transaction is used.
>>>>
>>>>> If a domain only has one device, and it is attached to this device as
>>>>> normal domain (normal domain means non aux-domain here). Then
>>>>> you should flush cache with domain-id and RID2PASID value.
>>>>> If a domain has one device, and it is attached to this device as
>>>>> aux-domain. Then you may want to flush cache with domain-id
>>>>> and default pasid. right?
>>>> A domain's counterpart is IOMMU group. So we say attach/detach domain
>>>> to/from devices in a group. We don't allow devices with different
>>>> default pasid sitting in a same group, right?
>>>>
>>>>> Then let's come to the case I mentioned in previous email. a mdev
>>>>> and another device assigned to a single VM. In host, you will have
>>>>> a domain which has two devices, one device(deva) is attached as
>>>> No. We will have two IOMMU groups and two domains. Correct me if my
>>>> understanding is not right.
>>> Reconsidered this. Unfortunately, my understanding is not right. :-(
>>>
>>> A single domain could be attached to multiple IOMMU groups. So it
>>> comes to the issue you concerned. Do I understand it right?
>> yes. Device within the same group has no such issue since such
>> devices are not able to enabled aux-domain. Now our understanding
>> are aligned. :-)
>>
>>>>> normal domain, another one (devB) is attached as aux-domain. Then
>>>>> which pasid should be used when the mapping in IOVA page table is
>>>>> modified? RID2PASID or default pasid? I think both should be used
>>>>> since the domain means differently to the two devices. If you just
>>>>> use default pasid, then deva may still be able to use stale caches.
>>> You are right. I will change it accordingly. The logic should look
>>> like:
>>>
>>> if (domain attached to physical device)
>>> 	flush_piotlb_with_RID2PASID()
>>> else if (domain_attached_to_mdev_device)
>>> 	flush_piotlb_with_default_pasid()
>>>
>>> Does this work for you? Thanks for catching this!
>> If no else, it would work for scalable mode. ^_^ I noticed you've
>> already corrected by yourself in another reply. :-) Look forward to
>> your next version.
> BTW. The discussion in this thread may apply to other cache flush
> in your series. Please have a check. At least, there are two places which
> need to be updated in this single patch.

Sure. I will.

Best regards,

baolu
>   
> Regards,
> Yi Liu
diff mbox series

Patch

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 3acfa6a25fa2..fb30d5053664 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1371,6 +1371,47 @@  void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
 	qi_submit_sync(&desc, iommu);
 }
 
+/* PASID-based IOTLB invalidation */
+void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
+		     unsigned long npages, bool ih)
+{
+	struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
+
+	/*
+	 * npages == -1 means a PASID-selective invalidation, otherwise,
+	 * a positive value for Page-selective-within-PASID invalidation.
+	 * 0 is not a valid input.
+	 */
+	if (WARN_ON(!npages)) {
+		pr_err("Invalid input npages = %ld\n", npages);
+		return;
+	}
+
+	if (npages == -1) {
+		desc.qw0 = QI_EIOTLB_PASID(pasid) |
+				QI_EIOTLB_DID(did) |
+				QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+				QI_EIOTLB_TYPE;
+		desc.qw1 = 0;
+	} else {
+		int mask = ilog2(__roundup_pow_of_two(npages));
+		unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
+
+		if (WARN_ON_ONCE(!ALIGN(addr, align)))
+			addr &= ~(align - 1);
+
+		desc.qw0 = QI_EIOTLB_PASID(pasid) |
+				QI_EIOTLB_DID(did) |
+				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
+				QI_EIOTLB_TYPE;
+		desc.qw1 = QI_EIOTLB_ADDR(addr) |
+				QI_EIOTLB_IH(ih) |
+				QI_EIOTLB_AM(mask);
+	}
+
+	qi_submit_sync(&desc, iommu);
+}
+
 /*
  * Disable Queued Invalidation interface.
  */
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 83a7abf0c4f0..e47f5fe37b59 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1520,18 +1520,24 @@  static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
 
 	if (ih)
 		ih = 1 << 6;
-	/*
-	 * Fallback to domain selective flush if no PSI support or the size is
-	 * too big.
-	 * PSI requires page size to be 2 ^ x, and the base address is naturally
-	 * aligned to the size
-	 */
-	if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
-		iommu->flush.flush_iotlb(iommu, did, 0, 0,
-						DMA_TLB_DSI_FLUSH);
-	else
-		iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
-						DMA_TLB_PSI_FLUSH);
+
+	if (domain_use_first_level(domain)) {
+		qi_flush_piotlb(iommu, did, domain->default_pasid,
+				addr, pages, ih);
+	} else {
+		/*
+		 * Fallback to domain selective flush if no PSI support or
+		 * the size is too big. PSI requires page size to be 2 ^ x,
+		 * and the base address is naturally aligned to the size.
+		 */
+		if (!cap_pgsel_inv(iommu->cap) ||
+		    mask > cap_max_amask_val(iommu->cap))
+			iommu->flush.flush_iotlb(iommu, did, 0, 0,
+							DMA_TLB_DSI_FLUSH);
+		else
+			iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
+							DMA_TLB_PSI_FLUSH);
+	}
 
 	/*
 	 * In caching mode, changes of pages from non-present to present require
@@ -1546,8 +1552,11 @@  static inline void __mapping_notify_one(struct intel_iommu *iommu,
 					struct dmar_domain *domain,
 					unsigned long pfn, unsigned int pages)
 {
-	/* It's a non-present to present mapping. Only flush if caching mode */
-	if (cap_caching_mode(iommu->cap))
+	/*
+	 * It's a non-present to present mapping. Only flush if caching mode
+	 * and second level.
+	 */
+	if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
 		iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
 	else
 		iommu_flush_write_buffer(iommu);
@@ -1564,7 +1573,12 @@  static void iommu_flush_iova(struct iova_domain *iovad)
 		struct intel_iommu *iommu = g_iommus[idx];
 		u16 did = domain->iommu_did[iommu->seq_id];
 
-		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+		if (domain_use_first_level(domain))
+			qi_flush_piotlb(iommu, did,
+					domain->default_pasid, 0, -1, 0);
+		else
+			iommu->flush.flush_iotlb(iommu, did, 0, 0,
+						 DMA_TLB_DSI_FLUSH);
 
 		if (!cap_caching_mode(iommu->cap))
 			iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 66b525bad434..b693540e4b4f 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -648,6 +648,8 @@  extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 			  unsigned int size_order, u64 type);
 extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
 			u16 qdep, u64 addr, unsigned mask);
+void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
+		     unsigned long npages, bool ih);
 extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
 extern int dmar_ir_support(void);