diff mbox series

[v4,14/32] iommu: introduce iommu_domain_alloc_type and the KVM type

Message ID 20220314194451.58266-15-mjrosato@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series KVM: s390: enable zPCI for interpretive execution | expand

Commit Message

Matthew Rosato March 14, 2022, 7:44 p.m. UTC
s390x will introduce an additional domain type that is used for
managing IOMMU owned by KVM.  Define the type here and add an
interface for allocating a specified type vs the default type.

Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
 drivers/iommu/iommu.c |  7 +++++++
 include/linux/iommu.h | 12 ++++++++++++
 2 files changed, 19 insertions(+)

Comments

Jason Gunthorpe March 14, 2022, 9:36 p.m. UTC | #1
On Mon, Mar 14, 2022 at 03:44:33PM -0400, Matthew Rosato wrote:
> s390x will introduce an additional domain type that is used for
> managing IOMMU owned by KVM.  Define the type here and add an
> interface for allocating a specified type vs the default type.
> 
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
>  drivers/iommu/iommu.c |  7 +++++++
>  include/linux/iommu.h | 12 ++++++++++++
>  2 files changed, 19 insertions(+)

I think the general idea is right, but I'm not keen on this as an
interface at all.

We are trying to build in iommufd a generic interface for an IOMMU
driver to expose IOMMU-device-specific domains such as this in a
general purpose way so all the platforms can get what they need.

Jason
Robin Murphy March 15, 2022, 10:49 a.m. UTC | #2
On 2022-03-14 19:44, Matthew Rosato wrote:
> s390x will introduce an additional domain type that is used for
> managing IOMMU owned by KVM.  Define the type here and add an
> interface for allocating a specified type vs the default type.

I'm also not a huge fan of adding a new domain_alloc interface like 
this, however if it is justifiable, then please make it take struct 
device rather than struct bus_type as an argument.

It also sounds like there may be a degree of conceptual overlap here 
with what Jean-Philippe is working on for sharing pagetables between KVM 
and SMMU for Android pKVM, so it's probably worth some thought over 
whether there's any scope for common interfaces in terms of actual 
implementation.

Thanks,
Robin.

> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
>   drivers/iommu/iommu.c |  7 +++++++
>   include/linux/iommu.h | 12 ++++++++++++
>   2 files changed, 19 insertions(+)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index f2c45b85b9fc..8bb57e0e3945 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1976,6 +1976,13 @@ void iommu_domain_free(struct iommu_domain *domain)
>   }
>   EXPORT_SYMBOL_GPL(iommu_domain_free);
>   
> +struct iommu_domain *iommu_domain_alloc_type(struct bus_type *bus,
> +					     unsigned int t)
> +{
> +	return __iommu_domain_alloc(bus, t);
> +}
> +EXPORT_SYMBOL_GPL(iommu_domain_alloc_type);
> +
>   static int __iommu_attach_device(struct iommu_domain *domain,
>   				 struct device *dev)
>   {
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 9208eca4b0d1..b427bbb9f387 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -63,6 +63,7 @@ struct iommu_domain_geometry {
>   					      implementation              */
>   #define __IOMMU_DOMAIN_PT	(1U << 2)  /* Domain is identity mapped   */
>   #define __IOMMU_DOMAIN_DMA_FQ	(1U << 3)  /* DMA-API uses flush queue    */
> +#define __IOMMU_DOMAIN_KVM	(1U << 4)  /* Domain is controlled by KVM */
>   
>   /*
>    * This are the possible domain-types
> @@ -77,6 +78,7 @@ struct iommu_domain_geometry {
>    *				  certain optimizations for these domains
>    *	IOMMU_DOMAIN_DMA_FQ	- As above, but definitely using batched TLB
>    *				  invalidation.
> + *	IOMMU_DOMAIN_KVM	- DMA mappings managed by KVM, used for VMs
>    */
>   #define IOMMU_DOMAIN_BLOCKED	(0U)
>   #define IOMMU_DOMAIN_IDENTITY	(__IOMMU_DOMAIN_PT)
> @@ -86,6 +88,8 @@ struct iommu_domain_geometry {
>   #define IOMMU_DOMAIN_DMA_FQ	(__IOMMU_DOMAIN_PAGING |	\
>   				 __IOMMU_DOMAIN_DMA_API |	\
>   				 __IOMMU_DOMAIN_DMA_FQ)
> +#define IOMMU_DOMAIN_KVM	(__IOMMU_DOMAIN_PAGING |	\
> +				 __IOMMU_DOMAIN_KVM)
>   
>   struct iommu_domain {
>   	unsigned type;
> @@ -421,6 +425,8 @@ extern bool iommu_capable(struct bus_type *bus, enum iommu_cap cap);
>   extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus);
>   extern struct iommu_group *iommu_group_get_by_id(int id);
>   extern void iommu_domain_free(struct iommu_domain *domain);
> +extern struct iommu_domain *iommu_domain_alloc_type(struct bus_type *bus,
> +						    unsigned int t);
>   extern int iommu_attach_device(struct iommu_domain *domain,
>   			       struct device *dev);
>   extern void iommu_detach_device(struct iommu_domain *domain,
> @@ -708,6 +714,12 @@ static inline void iommu_domain_free(struct iommu_domain *domain)
>   {
>   }
>   
> +static inline struct iommu_domain *iommu_domain_alloc_type(struct bus_type *bus,
> +							   unsigned int t)
> +{
> +	return NULL;
> +}
> +
>   static inline int iommu_attach_device(struct iommu_domain *domain,
>   				      struct device *dev)
>   {
Tian, Kevin March 17, 2022, 5:47 a.m. UTC | #3
> From: Robin Murphy
> Sent: Tuesday, March 15, 2022 6:49 PM
> 
> On 2022-03-14 19:44, Matthew Rosato wrote:
> > s390x will introduce an additional domain type that is used for
> > managing IOMMU owned by KVM.  Define the type here and add an
> > interface for allocating a specified type vs the default type.
> 
> I'm also not a huge fan of adding a new domain_alloc interface like
> this, however if it is justifiable, then please make it take struct
> device rather than struct bus_type as an argument.
> 
> It also sounds like there may be a degree of conceptual overlap here
> with what Jean-Philippe is working on for sharing pagetables between KVM
> and SMMU for Android pKVM, so it's probably worth some thought over
> whether there's any scope for common interfaces in terms of actual
> implementation.
> 

Same here. Yan Zhao is working on page table sharing between KVM
and VT-d. This is one important usage to build atop iommufd and
a set of common interfaces are definitely necessary here. 
Jason Gunthorpe March 17, 2022, 1:52 p.m. UTC | #4
On Thu, Mar 17, 2022 at 05:47:36AM +0000, Tian, Kevin wrote:
> > From: Robin Murphy
> > Sent: Tuesday, March 15, 2022 6:49 PM
> > 
> > On 2022-03-14 19:44, Matthew Rosato wrote:
> > > s390x will introduce an additional domain type that is used for
> > > managing IOMMU owned by KVM.  Define the type here and add an
> > > interface for allocating a specified type vs the default type.
> > 
> > I'm also not a huge fan of adding a new domain_alloc interface like
> > this, however if it is justifiable, then please make it take struct
> > device rather than struct bus_type as an argument.
> > 
> > It also sounds like there may be a degree of conceptual overlap here
> > with what Jean-Philippe is working on for sharing pagetables between KVM
> > and SMMU for Android pKVM, so it's probably worth some thought over
> > whether there's any scope for common interfaces in terms of actual
> > implementation.
> 
> Same here. Yan Zhao is working on page table sharing between KVM
> and VT-d. This is one important usage to build atop iommufd and
> a set of common interfaces are definitely necessary here. 
Tian, Kevin March 18, 2022, 2:23 a.m. UTC | #5
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Thursday, March 17, 2022 9:53 PM
> 
> On Thu, Mar 17, 2022 at 05:47:36AM +0000, Tian, Kevin wrote:
> > > From: Robin Murphy
> > > Sent: Tuesday, March 15, 2022 6:49 PM
> > >
> > > On 2022-03-14 19:44, Matthew Rosato wrote:
> > > > s390x will introduce an additional domain type that is used for
> > > > managing IOMMU owned by KVM.  Define the type here and add an
> > > > interface for allocating a specified type vs the default type.
> > >
> > > I'm also not a huge fan of adding a new domain_alloc interface like
> > > this, however if it is justifiable, then please make it take struct
> > > device rather than struct bus_type as an argument.
> > >
> > > It also sounds like there may be a degree of conceptual overlap here
> > > with what Jean-Philippe is working on for sharing pagetables between
> KVM
> > > and SMMU for Android pKVM, so it's probably worth some thought over
> > > whether there's any scope for common interfaces in terms of actual
> > > implementation.
> >
> > Same here. Yan Zhao is working on page table sharing between KVM
> > and VT-d. This is one important usage to build atop iommufd and
> > a set of common interfaces are definitely necessary here. 
Jason Gunthorpe March 18, 2022, 2:13 p.m. UTC | #6
On Fri, Mar 18, 2022 at 02:23:57AM +0000, Tian, Kevin wrote:

> Yes, that is another major part work besides the iommufd work. And
> it is not compatible with KVM features which rely on the dynamic
> manner of EPT. Though It is a bit questionable whether it's worthy of
> doing so just for saving memory footprint while losing other capabilities,
> it is a requirement for some future security extension in Intel trusted
> computing architecture. And KVM has been pinning pages for SEV/TDX/etc.
> today thus some facilities can be reused. But I agree it is not a simple
> task thus we need start discussion early to explore various gaps in
> iommu and kvm.

Yikes. IMHO this might work better going the other way, have KVM
import the iommu_domain and use that as the KVM page table than vice
versa.

The semantics are a heck of a lot clearer, and it is really obvious
that alot of KVM becomes disabled if you do this.

Jason
Tian, Kevin March 19, 2022, 7:51 a.m. UTC | #7
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, March 18, 2022 10:13 PM
> 
> On Fri, Mar 18, 2022 at 02:23:57AM +0000, Tian, Kevin wrote:
> 
> > Yes, that is another major part work besides the iommufd work. And
> > it is not compatible with KVM features which rely on the dynamic
> > manner of EPT. Though It is a bit questionable whether it's worthy of
> > doing so just for saving memory footprint while losing other capabilities,
> > it is a requirement for some future security extension in Intel trusted
> > computing architecture. And KVM has been pinning pages for SEV/TDX/etc.
> > today thus some facilities can be reused. But I agree it is not a simple
> > task thus we need start discussion early to explore various gaps in
> > iommu and kvm.
> 
> Yikes. IMHO this might work better going the other way, have KVM
> import the iommu_domain and use that as the KVM page table than vice
> versa.
> 
> The semantics are a heck of a lot clearer, and it is really obvious
> that alot of KVM becomes disabled if you do this.
> 

This is an interesting angle to look at it. But given pinning is already
required in KVM to support SEV/TDX even w/o assigned device, those
restrictions have to be understood by KVM MMU code which makes
a KVM-managed page table under such restrictions closer to be 
sharable with IOMMU.

Thanks
Kevin
Jason Gunthorpe March 21, 2022, 2:07 p.m. UTC | #8
On Sat, Mar 19, 2022 at 07:51:31AM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Friday, March 18, 2022 10:13 PM
> > 
> > On Fri, Mar 18, 2022 at 02:23:57AM +0000, Tian, Kevin wrote:
> > 
> > > Yes, that is another major part work besides the iommufd work. And
> > > it is not compatible with KVM features which rely on the dynamic
> > > manner of EPT. Though It is a bit questionable whether it's worthy of
> > > doing so just for saving memory footprint while losing other capabilities,
> > > it is a requirement for some future security extension in Intel trusted
> > > computing architecture. And KVM has been pinning pages for SEV/TDX/etc.
> > > today thus some facilities can be reused. But I agree it is not a simple
> > > task thus we need start discussion early to explore various gaps in
> > > iommu and kvm.
> > 
> > Yikes. IMHO this might work better going the other way, have KVM
> > import the iommu_domain and use that as the KVM page table than vice
> > versa.
> > 
> > The semantics are a heck of a lot clearer, and it is really obvious
> > that alot of KVM becomes disabled if you do this.
> > 
> 
> This is an interesting angle to look at it. But given pinning is already
> required in KVM to support SEV/TDX even w/o assigned device, those
> restrictions have to be understood by KVM MMU code which makes
> a KVM-managed page table under such restrictions closer to be 
> sharable with IOMMU.

I thought the SEV/TDX stuff wasn't being done with pinning but via a
memfd in a special mode that does sort of pin under the covers, but it
is not necessarily a DMA pin. (it isn't even struct page memory, so
I'm not even sure what pin means)

Certainly, there is no inherent problem with SEV/TDX having movable
memory and KVM could concievably handle this - but iommu cannot.

I would not make an equivilance with SEV/TDX and iommu at least..

Jason
Tian, Kevin March 22, 2022, 7:30 a.m. UTC | #9
> From: Jason Gunthorpe
> Sent: Monday, March 21, 2022 10:07 PM
> 
> On Sat, Mar 19, 2022 at 07:51:31AM +0000, Tian, Kevin wrote:
> > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > Sent: Friday, March 18, 2022 10:13 PM
> > >
> > > On Fri, Mar 18, 2022 at 02:23:57AM +0000, Tian, Kevin wrote:
> > >
> > > > Yes, that is another major part work besides the iommufd work. And
> > > > it is not compatible with KVM features which rely on the dynamic
> > > > manner of EPT. Though It is a bit questionable whether it's worthy of
> > > > doing so just for saving memory footprint while losing other capabilities,
> > > > it is a requirement for some future security extension in Intel trusted
> > > > computing architecture. And KVM has been pinning pages for
> SEV/TDX/etc.
> > > > today thus some facilities can be reused. But I agree it is not a simple
> > > > task thus we need start discussion early to explore various gaps in
> > > > iommu and kvm.
> > >
> > > Yikes. IMHO this might work better going the other way, have KVM
> > > import the iommu_domain and use that as the KVM page table than vice
> > > versa.
> > >
> > > The semantics are a heck of a lot clearer, and it is really obvious
> > > that alot of KVM becomes disabled if you do this.
> > >
> >
> > This is an interesting angle to look at it. But given pinning is already
> > required in KVM to support SEV/TDX even w/o assigned device, those
> > restrictions have to be understood by KVM MMU code which makes
> > a KVM-managed page table under such restrictions closer to be
> > sharable with IOMMU.
> 
> I thought the SEV/TDX stuff wasn't being done with pinning but via a
> memfd in a special mode that does sort of pin under the covers, but it
> is not necessarily a DMA pin. (it isn't even struct page memory, so
> I'm not even sure what pin means)
> 
> Certainly, there is no inherent problem with SEV/TDX having movable
> memory and KVM could concievably handle this - but iommu cannot.
> 
> I would not make an equivilance with SEV/TDX and iommu at least..
> 

Currently SEV does use DMA pin i.e. pin_user_pages in sev_pin_memory().

I'm not sure whether it's a hardware limitation or just a software tradeoff
for simplicity. But having that code does imply that KVM has absorbed
certain restrictions with that pinning fact.

But I agree they are not equivalent. e.g. suppose pinning is only applied to
private/encrypted memory in SEV/TDX while iommu requires pinning the
entire guest memory (if no IOPF support on device).

btw no matter it's KVM to import iommu domain or it's iommufd to
import KVM page table, in the end KVM mmu needs to explicitly mark
out its page table as shared with IOMMU and enable all kinds of
restrictions to support that sharing fact.

Thanks
Kevin
diff mbox series

Patch

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f2c45b85b9fc..8bb57e0e3945 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1976,6 +1976,13 @@  void iommu_domain_free(struct iommu_domain *domain)
 }
 EXPORT_SYMBOL_GPL(iommu_domain_free);
 
+struct iommu_domain *iommu_domain_alloc_type(struct bus_type *bus,
+					     unsigned int t)
+{
+	return __iommu_domain_alloc(bus, t);
+}
+EXPORT_SYMBOL_GPL(iommu_domain_alloc_type);
+
 static int __iommu_attach_device(struct iommu_domain *domain,
 				 struct device *dev)
 {
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 9208eca4b0d1..b427bbb9f387 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -63,6 +63,7 @@  struct iommu_domain_geometry {
 					      implementation              */
 #define __IOMMU_DOMAIN_PT	(1U << 2)  /* Domain is identity mapped   */
 #define __IOMMU_DOMAIN_DMA_FQ	(1U << 3)  /* DMA-API uses flush queue    */
+#define __IOMMU_DOMAIN_KVM	(1U << 4)  /* Domain is controlled by KVM */
 
 /*
  * This are the possible domain-types
@@ -77,6 +78,7 @@  struct iommu_domain_geometry {
  *				  certain optimizations for these domains
  *	IOMMU_DOMAIN_DMA_FQ	- As above, but definitely using batched TLB
  *				  invalidation.
+ *	IOMMU_DOMAIN_KVM	- DMA mappings managed by KVM, used for VMs
  */
 #define IOMMU_DOMAIN_BLOCKED	(0U)
 #define IOMMU_DOMAIN_IDENTITY	(__IOMMU_DOMAIN_PT)
@@ -86,6 +88,8 @@  struct iommu_domain_geometry {
 #define IOMMU_DOMAIN_DMA_FQ	(__IOMMU_DOMAIN_PAGING |	\
 				 __IOMMU_DOMAIN_DMA_API |	\
 				 __IOMMU_DOMAIN_DMA_FQ)
+#define IOMMU_DOMAIN_KVM	(__IOMMU_DOMAIN_PAGING |	\
+				 __IOMMU_DOMAIN_KVM)
 
 struct iommu_domain {
 	unsigned type;
@@ -421,6 +425,8 @@  extern bool iommu_capable(struct bus_type *bus, enum iommu_cap cap);
 extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus);
 extern struct iommu_group *iommu_group_get_by_id(int id);
 extern void iommu_domain_free(struct iommu_domain *domain);
+extern struct iommu_domain *iommu_domain_alloc_type(struct bus_type *bus,
+						    unsigned int t);
 extern int iommu_attach_device(struct iommu_domain *domain,
 			       struct device *dev);
 extern void iommu_detach_device(struct iommu_domain *domain,
@@ -708,6 +714,12 @@  static inline void iommu_domain_free(struct iommu_domain *domain)
 {
 }
 
+static inline struct iommu_domain *iommu_domain_alloc_type(struct bus_type *bus,
+							   unsigned int t)
+{
+	return NULL;
+}
+
 static inline int iommu_attach_device(struct iommu_domain *domain,
 				      struct device *dev)
 {