diff mbox series

[v2,6/8] iommu/arm-smmu-v3: Support IOMMU_GET_HW_INFO via struct arm_smmu_hw_info

Message ID 6-v2-621370057090+91fec-smmuv3_nesting_jgg@nvidia.com (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series Initial support for SMMUv3 nested translation | expand

Commit Message

Jason Gunthorpe Aug. 27, 2024, 3:51 p.m. UTC
From: Nicolin Chen <nicolinc@nvidia.com>

For virtualization cases the IDR/IIDR/AIDR values of the actual SMMU
instance need to be available to the VMM so it can construct an
appropriate vSMMUv3 that reflects the correct HW capabilities.

For userspace page tables these values are required to constrain the valid
values within the CD table and the IOPTEs.

The kernel does not sanitize these values. If building a VMM then
userspace is required to only forward bits into a VM that it knows it can
implement. Some bits will also require a VMM to detect if appropriate
kernel support is available such as for ATS and BTM.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 24 ++++++++++++++
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  2 ++
 include/uapi/linux/iommufd.h                | 35 +++++++++++++++++++++
 3 files changed, 61 insertions(+)

Comments

Tian, Kevin Aug. 30, 2024, 7:55 a.m. UTC | #1
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, August 27, 2024 11:52 PM
> 
> From: Nicolin Chen <nicolinc@nvidia.com>
> 
> For virtualization cases the IDR/IIDR/AIDR values of the actual SMMU
> instance need to be available to the VMM so it can construct an
> appropriate vSMMUv3 that reflects the correct HW capabilities.
> 
> For userspace page tables these values are required to constrain the valid
> values within the CD table and the IOPTEs.
> 
> The kernel does not sanitize these values. If building a VMM then
> userspace is required to only forward bits into a VM that it knows it can
> implement. Some bits will also require a VMM to detect if appropriate
> kernel support is available such as for ATS and BTM.
> 
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Mostafa Saleh Aug. 30, 2024, 3:23 p.m. UTC | #2
Hi Jason,

On Tue, Aug 27, 2024 at 12:51:36PM -0300, Jason Gunthorpe wrote:
> From: Nicolin Chen <nicolinc@nvidia.com>
> 
> For virtualization cases the IDR/IIDR/AIDR values of the actual SMMU
> instance need to be available to the VMM so it can construct an
> appropriate vSMMUv3 that reflects the correct HW capabilities.
> 
> For userspace page tables these values are required to constrain the valid
> values within the CD table and the IOPTEs.
> 
> The kernel does not sanitize these values. If building a VMM then
> userspace is required to only forward bits into a VM that it knows it can
> implement. Some bits will also require a VMM to detect if appropriate
> kernel support is available such as for ATS and BTM.
> 
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 24 ++++++++++++++
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  2 ++
>  include/uapi/linux/iommufd.h                | 35 +++++++++++++++++++++
>  3 files changed, 61 insertions(+)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index c2021e821e5cb6..ec2fcdd4523a26 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -2288,6 +2288,29 @@ static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain)
>  	return ret;
>  }
>  
> +static void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type)
> +{
> +	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> +	struct iommu_hw_info_arm_smmuv3 *info;
> +	u32 __iomem *base_idr;
> +	unsigned int i;
> +
> +	info = kzalloc(sizeof(*info), GFP_KERNEL);
> +	if (!info)
> +		return ERR_PTR(-ENOMEM);
> +
> +	base_idr = master->smmu->base + ARM_SMMU_IDR0;
> +	for (i = 0; i <= 5; i++)
> +		info->idr[i] = readl_relaxed(base_idr + i);
> +	info->iidr = readl_relaxed(master->smmu->base + ARM_SMMU_IIDR);
> +	info->aidr = readl_relaxed(master->smmu->base + ARM_SMMU_AIDR);
> +
> +	*length = sizeof(*info);
> +	*type = IOMMU_HW_INFO_TYPE_ARM_SMMUV3;
> +
> +	return info;
> +}
> +
>  struct arm_smmu_domain *arm_smmu_domain_alloc(void)
>  {
>  	struct arm_smmu_domain *smmu_domain;
> @@ -3467,6 +3490,7 @@ static struct iommu_ops arm_smmu_ops = {
>  	.identity_domain	= &arm_smmu_identity_domain,
>  	.blocked_domain		= &arm_smmu_blocked_domain,
>  	.capable		= arm_smmu_capable,
> +	.hw_info		= arm_smmu_hw_info,
>  	.domain_alloc_paging    = arm_smmu_domain_alloc_paging,
>  	.domain_alloc_sva       = arm_smmu_sva_domain_alloc,
>  	.domain_alloc_user	= arm_smmu_domain_alloc_user,
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 45882f65bfcad0..4b05c81b181a82 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -80,6 +80,8 @@
>  #define IIDR_REVISION			GENMASK(15, 12)
>  #define IIDR_IMPLEMENTER		GENMASK(11, 0)
>  
> +#define ARM_SMMU_AIDR			0x1C
> +
>  #define ARM_SMMU_CR0			0x20
>  #define CR0_ATSCHK			(1 << 4)
>  #define CR0_CMDQEN			(1 << 3)
> diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
> index 4dde745cfb7e29..83b6e1cd338d8f 100644
> --- a/include/uapi/linux/iommufd.h
> +++ b/include/uapi/linux/iommufd.h
> @@ -484,15 +484,50 @@ struct iommu_hw_info_vtd {
>  	__aligned_u64 ecap_reg;
>  };
>  
> +/**
> + * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information
> + *                                   (IOMMU_HW_INFO_TYPE_ARM_SMMUV3)
> + *
> + * @flags: Must be set to 0
> + * @__reserved: Must be 0
> + * @idr: Implemented features for ARM SMMU Non-secure programming interface
> + * @iidr: Information about the implementation and implementer of ARM SMMU,
> + *        and architecture version supported
> + * @aidr: ARM SMMU architecture version
> + *
> + * For the details of @idr, @iidr and @aidr, please refer to the chapters
> + * from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
> + *
> + * User space should read the underlying ARM SMMUv3 hardware information for
> + * the list of supported features.
> + *
> + * Note that these values reflect the raw HW capability, without any insight if
> + * any required kernel driver support is present. Bits may be set indicating the
> + * HW has functionality that is lacking kernel software support, such as BTM. If
> + * a VMM is using this information to construct emulated copies of these
> + * registers it should only forward bits that it knows it can support.
> + *
> + * In future, presence of required kernel support will be indicated in flags.
> + */
> +struct iommu_hw_info_arm_smmuv3 {
> +	__u32 flags;
> +	__u32 __reserved;
> +	__u32 idr[6];
> +	__u32 iidr;
> +	__u32 aidr;
> +};
There is a ton of information here, I think we might need to santitze the
values for what user space needs to know (that's why I was asking about qemu)
also SMMU_IDR4 is implementation define, not sure if we can unconditionally
expose it to userspace.

Thanks,
Mostafa
> +
>  /**
>   * enum iommu_hw_info_type - IOMMU Hardware Info Types
>   * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware
>   *                           info
>   * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
> + * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type
>   */
>  enum iommu_hw_info_type {
>  	IOMMU_HW_INFO_TYPE_NONE = 0,
>  	IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
> +	IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2,
>  };
>  
>  /**
> -- 
> 2.46.0
>
Jason Gunthorpe Aug. 30, 2024, 5:16 p.m. UTC | #3
On Fri, Aug 30, 2024 at 03:23:41PM +0000, Mostafa Saleh wrote:
> > +/**
> > + * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information
> > + *                                   (IOMMU_HW_INFO_TYPE_ARM_SMMUV3)
> > + *
> > + * @flags: Must be set to 0
> > + * @__reserved: Must be 0
> > + * @idr: Implemented features for ARM SMMU Non-secure programming interface
> > + * @iidr: Information about the implementation and implementer of ARM SMMU,
> > + *        and architecture version supported
> > + * @aidr: ARM SMMU architecture version
> > + *
> > + * For the details of @idr, @iidr and @aidr, please refer to the chapters
> > + * from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
> > + *
> > + * User space should read the underlying ARM SMMUv3 hardware information for
> > + * the list of supported features.
> > + *
> > + * Note that these values reflect the raw HW capability, without any insight if
> > + * any required kernel driver support is present. Bits may be set indicating the
> > + * HW has functionality that is lacking kernel software support, such as BTM. If
> > + * a VMM is using this information to construct emulated copies of these
> > + * registers it should only forward bits that it knows it can support.
> > + *
> > + * In future, presence of required kernel support will be indicated in flags.
> > + */
> > +struct iommu_hw_info_arm_smmuv3 {
> > +	__u32 flags;
> > +	__u32 __reserved;
> > +	__u32 idr[6];
> > +	__u32 iidr;
> > +	__u32 aidr;
> > +};
> There is a ton of information here, I think we might need to santitze the
> values for what user space needs to know (that's why I was asking about qemu)
> also SMMU_IDR4 is implementation define, not sure if we can unconditionally
> expose it to userspace.

What is the harm? Does exposing IDR data to userspace in any way
compromise the security or integrity of the system?

I think no - how could it?

As the comments says, the VMM should not just blindly forward this to
a guest!

The VMM needs to make its own IDR to reflect its own vSMMU
capabilities. It can refer to the kernel IDR if it needs to.

So, if the kernel is going to limit it, what criteria would you
propose the kernel use?

Jason
Mostafa Saleh Sept. 2, 2024, 10:11 a.m. UTC | #4
On Fri, Aug 30, 2024 at 02:16:02PM -0300, Jason Gunthorpe wrote:
> On Fri, Aug 30, 2024 at 03:23:41PM +0000, Mostafa Saleh wrote:
> > > +/**
> > > + * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information
> > > + *                                   (IOMMU_HW_INFO_TYPE_ARM_SMMUV3)
> > > + *
> > > + * @flags: Must be set to 0
> > > + * @__reserved: Must be 0
> > > + * @idr: Implemented features for ARM SMMU Non-secure programming interface
> > > + * @iidr: Information about the implementation and implementer of ARM SMMU,
> > > + *        and architecture version supported
> > > + * @aidr: ARM SMMU architecture version
> > > + *
> > > + * For the details of @idr, @iidr and @aidr, please refer to the chapters
> > > + * from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
> > > + *
> > > + * User space should read the underlying ARM SMMUv3 hardware information for
> > > + * the list of supported features.
> > > + *
> > > + * Note that these values reflect the raw HW capability, without any insight if
> > > + * any required kernel driver support is present. Bits may be set indicating the
> > > + * HW has functionality that is lacking kernel software support, such as BTM. If
> > > + * a VMM is using this information to construct emulated copies of these
> > > + * registers it should only forward bits that it knows it can support.
> > > + *
> > > + * In future, presence of required kernel support will be indicated in flags.
> > > + */
> > > +struct iommu_hw_info_arm_smmuv3 {
> > > +	__u32 flags;
> > > +	__u32 __reserved;
> > > +	__u32 idr[6];
> > > +	__u32 iidr;
> > > +	__u32 aidr;
> > > +};
> > There is a ton of information here, I think we might need to santitze the
> > values for what user space needs to know (that's why I was asking about qemu)
> > also SMMU_IDR4 is implementation define, not sure if we can unconditionally
> > expose it to userspace.
> 
> What is the harm? Does exposing IDR data to userspace in any way
> compromise the security or integrity of the system?
> 
> I think no - how could it?

I don’t see a clear harm or exploit with exposing IDRs, but IMHO we
should deal with userspace with the least privilege principle and
only expose what user space cares about (with sanitised IDRs or
through another mechanism)

For example, KVM doesn’t allow reading reading the CPU system
registers to know if SVE(or other features) is supported but hides
that by a CAP in KVM_CHECK_EXTENSION

> 
> As the comments says, the VMM should not just blindly forward this to
> a guest!

I don't think the kernel should trust userspace.

> 
> The VMM needs to make its own IDR to reflect its own vSMMU
> capabilities. It can refer to the kernel IDR if it needs to.
> 
> So, if the kernel is going to limit it, what criteria would you
> propose the kernel use?

I agree that the VMM would create a virtual IDR for guest, but that
doesn't have to be directly based on the physical one (same as CPU).

Thanks,
Mostafa

> 
> Jason
Jason Gunthorpe Sept. 3, 2024, 12:16 a.m. UTC | #5
On Mon, Sep 02, 2024 at 10:11:16AM +0000, Mostafa Saleh wrote:

> > What is the harm? Does exposing IDR data to userspace in any way
> > compromise the security or integrity of the system?
> > 
> > I think no - how could it?
> 
> I don’t see a clear harm or exploit with exposing IDRs, but IMHO we
> should deal with userspace with the least privilege principle and
> only expose what user space cares about (with sanitised IDRs or
> through another mechanism)

If the information is harmless then why hide it? We expose all kinds
of stuff to userspace, like most of the PCI config space for
instance. I think we need a reason. 

Any sanitization in the kernel will complicate everything because we
will get it wrong.

Let's not make things complicated without reasons. Intel and AMD are
exposing their IDR equivalents in this manner as well.

> For example, KVM doesn’t allow reading reading the CPU system
> registers to know if SVE(or other features) is supported but hides
> that by a CAP in KVM_CHECK_EXTENSION

Do you know why?

> > As the comments says, the VMM should not just blindly forward this to
> > a guest!
> 
> I don't think the kernel should trust userspace.

There is no trust. If the VMM blindly forwards the IDRS then the VMM
will find its VM's have issues. It is a functional bug, just as if the
VMM puts random garbage in its vIDRS.

The onl purpose of this interface is to provide information about the
physical hardware to the VMM.

> > The VMM needs to make its own IDR to reflect its own vSMMU
> > capabilities. It can refer to the kernel IDR if it needs to.
> > 
> > So, if the kernel is going to limit it, what criteria would you
> > propose the kernel use?
> 
> I agree that the VMM would create a virtual IDR for guest, but that
> doesn't have to be directly based on the physical one (same as CPU).

No one said it should be. In fact the comment explicitly says not to
do that.

The VMM is expected to read out of the physical IDR any information
that effects data structures that are under direct guest control.

For instance anything that effects the CD on downwards. So page sizes,
IAS limits, etc etc etc. Anything that effects assigned invalidation
queues. Anything that impacts errata the VM needs to be aware of.

If you sanitize it then you will hide information that someone will
need at some point, then we have go an unsanitize it, then add feature
flags.. It is a pain.

Jason
Mostafa Saleh Sept. 3, 2024, 8:34 a.m. UTC | #6
On Mon, Sep 02, 2024 at 09:16:54PM -0300, Jason Gunthorpe wrote:
> On Mon, Sep 02, 2024 at 10:11:16AM +0000, Mostafa Saleh wrote:
> 
> > > What is the harm? Does exposing IDR data to userspace in any way
> > > compromise the security or integrity of the system?
> > > 
> > > I think no - how could it?
> > 
> > I don’t see a clear harm or exploit with exposing IDRs, but IMHO we
> > should deal with userspace with the least privilege principle and
> > only expose what user space cares about (with sanitised IDRs or
> > through another mechanism)
> 
> If the information is harmless then why hide it? We expose all kinds
> of stuff to userspace, like most of the PCI config space for
> instance. I think we need a reason. 
> 
> Any sanitization in the kernel will complicate everything because we
> will get it wrong.
> 
> Let's not make things complicated without reasons. Intel and AMD are
> exposing their IDR equivalents in this manner as well.
> 
> > For example, KVM doesn’t allow reading reading the CPU system
> > registers to know if SVE(or other features) is supported but hides
> > that by a CAP in KVM_CHECK_EXTENSION
> 
> Do you know why?
> 

I am not really sure, but I believe it’s a useful abstraction

> > > As the comments says, the VMM should not just blindly forward this to
> > > a guest!
> > 
> > I don't think the kernel should trust userspace.
> 
> There is no trust. If the VMM blindly forwards the IDRS then the VMM
> will find its VM's have issues. It is a functional bug, just as if the
> VMM puts random garbage in its vIDRS.
> 
> The onl purpose of this interface is to provide information about the
> physical hardware to the VMM.
> 
> > > The VMM needs to make its own IDR to reflect its own vSMMU
> > > capabilities. It can refer to the kernel IDR if it needs to.
> > > 
> > > So, if the kernel is going to limit it, what criteria would you
> > > propose the kernel use?
> > 
> > I agree that the VMM would create a virtual IDR for guest, but that
> > doesn't have to be directly based on the physical one (same as CPU).
> 
> No one said it should be. In fact the comment explicitly says not to
> do that.
> 
> The VMM is expected to read out of the physical IDR any information
> that effects data structures that are under direct guest control.
> 
> For instance anything that effects the CD on downwards. So page sizes,
> IAS limits, etc etc etc. Anything that effects assigned invalidation
> queues. Anything that impacts errata the VM needs to be aware of.
> 
> If you sanitize it then you will hide information that someone will
> need at some point, then we have go an unsanitize it, then add feature
> flags.. It is a pain.

I don’t have a very strong opinion to sanitise the IDRs (specifically
many of those are documented anyway per IP), but at least we should have
some clear requirement for what userspace needs, I am just concerned
that userspace can misuse some of the features leading to a strange UAPI.

Thanks,
Mostafa

> 
> Jason
Jason Gunthorpe Sept. 3, 2024, 11:40 p.m. UTC | #7
On Tue, Sep 03, 2024 at 08:34:17AM +0000, Mostafa Saleh wrote:

> > > For example, KVM doesn’t allow reading reading the CPU system
> > > registers to know if SVE(or other features) is supported but hides
> > > that by a CAP in KVM_CHECK_EXTENSION
> > 
> > Do you know why?
> 
> I am not really sure, but I believe it’s a useful abstraction

It seems odd to me, unpriv userspace can look in /proc/cpuinfo and see
SEV, why would kvm hide the same information behind a
CAP_SYS_ADMIN/whatever check?

> I don’t have a very strong opinion to sanitise the IDRs (specifically
> many of those are documented anyway per IP), but at least we should have
> some clear requirement for what userspace needs, I am just concerned
> that userspace can misuse some of the features leading to a strange UAPI.

We should probably have a file in Documentation/ that does more
explaining of this.

The design has the kernel be very general, the kernel's scope is
bigger than just providing a vSMMU. It is the VMM's job to take the
kernel tools and build the vSMMU para virtualization.

It is like this because the configuration if the vSMMU is ultimately a
policy choice that should be configured by the operator. When we
consider live migration the vSMMU needs to be fully standardized and
consistent regardless of what the pSMMU is. We don't want policy in
the kernel.

Jason
Shameerali Kolothum Thodi Sept. 4, 2024, 7:11 a.m. UTC | #8
> -----Original Message-----
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Wednesday, September 4, 2024 12:40 AM
> To: Mostafa Saleh <smostafa@google.com>
> Cc: acpica-devel@lists.linux.dev; Guohanjun (Hanjun Guo)
> <guohanjun@huawei.com>; iommu@lists.linux.dev; Joerg Roedel
> <joro@8bytes.org>; Kevin Tian <kevin.tian@intel.com>;
> kvm@vger.kernel.org; Len Brown <lenb@kernel.org>; linux-
> acpi@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Lorenzo Pieralisi
> <lpieralisi@kernel.org>; Rafael J. Wysocki <rafael@kernel.org>; Robert
> Moore <robert.moore@intel.com>; Robin Murphy
> <robin.murphy@arm.com>; Sudeep Holla <sudeep.holla@arm.com>; Will
> Deacon <will@kernel.org>; Alex Williamson <alex.williamson@redhat.com>;
> Eric Auger <eric.auger@redhat.com>; Jean-Philippe Brucker <jean-
> philippe@linaro.org>; Moritz Fischer <mdf@kernel.org>; Michael Shavit
> <mshavit@google.com>; Nicolin Chen <nicolinc@nvidia.com>;
> patches@lists.linux.dev; Shameerali Kolothum Thodi
> <shameerali.kolothum.thodi@huawei.com>
> Subject: Re: [PATCH v2 6/8] iommu/arm-smmu-v3: Support
> IOMMU_GET_HW_INFO via struct arm_smmu_hw_info
> 
> On Tue, Sep 03, 2024 at 08:34:17AM +0000, Mostafa Saleh wrote:
> 
> > > > For example, KVM doesn’t allow reading reading the CPU system
> > > > registers to know if SVE(or other features) is supported but hides
> > > > that by a CAP in KVM_CHECK_EXTENSION
> > >
> > > Do you know why?
> >
> > I am not really sure, but I believe it’s a useful abstraction
> 
> It seems odd to me, unpriv userspace can look in /proc/cpuinfo and see
> SEV, why would kvm hide the same information behind a
> CAP_SYS_ADMIN/whatever check?

I don’t think KVM hides SVE always. It also depends on whether the VMM has
requested sve for a specific Guest or not(Qemu has option to turn sve on/off, similarly pmu
as well).  Based on that KVM populates the Guest specific ID registers.  And Guest
/proc/cpuinfo reflects that.

And for some features if KVM is not handling the feature properly or not making any sense
to be exposed to Guest, those features are masked in ID registers.

Recently ARM64 ID registers has been made writable from userspace to allow VMM to turn
on/off features, so that VMs can be migrated between hosts that differ in feature support.

https://lore.kernel.org/all/ZR2YfAixZgbCFnb8@linux.dev/T/#m7c2493fd2d43c13a3336d19f2dc06a89803c6fdb

Thanks,
Shameer
Jason Gunthorpe Sept. 4, 2024, 12:01 p.m. UTC | #9
On Wed, Sep 04, 2024 at 07:11:19AM +0000, Shameerali Kolothum Thodi wrote:

> > On Tue, Sep 03, 2024 at 08:34:17AM +0000, Mostafa Saleh wrote:
> > 
> > > > > For example, KVM doesn’t allow reading reading the CPU system
> > > > > registers to know if SVE(or other features) is supported but hides
> > > > > that by a CAP in KVM_CHECK_EXTENSION
> > > >
> > > > Do you know why?
> > >
> > > I am not really sure, but I believe it’s a useful abstraction
> > 
> > It seems odd to me, unpriv userspace can look in /proc/cpuinfo and see
> > SEV, why would kvm hide the same information behind a
> > CAP_SYS_ADMIN/whatever check?
> 
> I don’t think KVM hides SVE always. It also depends on whether the
> VMM has requested sve for a specific Guest or not(Qemu has option to
> turn sve on/off, similarly pmu as well).  Based on that KVM
> populates the Guest specific ID registers.  And Guest /proc/cpuinfo
> reflects that.
> 
> And for some features if KVM is not handling the feature properly or
> not making any sense to be exposed to Guest, those features are
> masked in ID registers.
> 
> Recently ARM64 ID registers has been made writable from userspace to
> allow VMM to turn on/off features, so that VMs can be migrated
> between hosts that differ in feature support.
> 
> https://lore.kernel.org/all/ZR2YfAixZgbCFnb8@linux.dev/T/#m7c2493fd2d43c13a3336d19f2dc06a89803c6fdb

I see, so there is a significant difference - in KVM the kernel
controls what ID values the VM observes and in vSMMU the VMM controls
it.

Jason
Mostafa Saleh Sept. 6, 2024, 11:19 a.m. UTC | #10
On Wed, Sep 04, 2024 at 09:01:03AM -0300, Jason Gunthorpe wrote:
> On Wed, Sep 04, 2024 at 07:11:19AM +0000, Shameerali Kolothum Thodi wrote:
> 
> > > On Tue, Sep 03, 2024 at 08:34:17AM +0000, Mostafa Saleh wrote:
> > > 
> > > > > > For example, KVM doesn’t allow reading reading the CPU system
> > > > > > registers to know if SVE(or other features) is supported but hides
> > > > > > that by a CAP in KVM_CHECK_EXTENSION
> > > > >
> > > > > Do you know why?
> > > >
> > > > I am not really sure, but I believe it’s a useful abstraction
> > > 
> > > It seems odd to me, unpriv userspace can look in /proc/cpuinfo and see
> > > SEV, why would kvm hide the same information behind a
> > > CAP_SYS_ADMIN/whatever check?
> > 
> > I don’t think KVM hides SVE always. It also depends on whether the
> > VMM has requested sve for a specific Guest or not(Qemu has option to
> > turn sve on/off, similarly pmu as well).  Based on that KVM
> > populates the Guest specific ID registers.  And Guest /proc/cpuinfo
> > reflects that.
> > 
> > And for some features if KVM is not handling the feature properly or
> > not making any sense to be exposed to Guest, those features are
> > masked in ID registers.
> > 
> > Recently ARM64 ID registers has been made writable from userspace to
> > allow VMM to turn on/off features, so that VMs can be migrated
> > between hosts that differ in feature support.
> > 
> > https://lore.kernel.org/all/ZR2YfAixZgbCFnb8@linux.dev/T/#m7c2493fd2d43c13a3336d19f2dc06a89803c6fdb
> 
> I see, so there is a significant difference - in KVM the kernel
> controls what ID values the VM observes and in vSMMU the VMM controls
> it.

Yes, that’s for guests.

What I meant is that the host sysregs are not read from userspace which
is the synonym of reading SMMUv3 IDRs from userspace, instead the kernel
controls what features are visible to userspace(VMM) which it can enable
for guests if it wants, as SVE, MTE...

Thanks,
Mostafa

> 
> Jason
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index c2021e821e5cb6..ec2fcdd4523a26 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2288,6 +2288,29 @@  static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain)
 	return ret;
 }
 
+static void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type)
+{
+	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+	struct iommu_hw_info_arm_smmuv3 *info;
+	u32 __iomem *base_idr;
+	unsigned int i;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	base_idr = master->smmu->base + ARM_SMMU_IDR0;
+	for (i = 0; i <= 5; i++)
+		info->idr[i] = readl_relaxed(base_idr + i);
+	info->iidr = readl_relaxed(master->smmu->base + ARM_SMMU_IIDR);
+	info->aidr = readl_relaxed(master->smmu->base + ARM_SMMU_AIDR);
+
+	*length = sizeof(*info);
+	*type = IOMMU_HW_INFO_TYPE_ARM_SMMUV3;
+
+	return info;
+}
+
 struct arm_smmu_domain *arm_smmu_domain_alloc(void)
 {
 	struct arm_smmu_domain *smmu_domain;
@@ -3467,6 +3490,7 @@  static struct iommu_ops arm_smmu_ops = {
 	.identity_domain	= &arm_smmu_identity_domain,
 	.blocked_domain		= &arm_smmu_blocked_domain,
 	.capable		= arm_smmu_capable,
+	.hw_info		= arm_smmu_hw_info,
 	.domain_alloc_paging    = arm_smmu_domain_alloc_paging,
 	.domain_alloc_sva       = arm_smmu_sva_domain_alloc,
 	.domain_alloc_user	= arm_smmu_domain_alloc_user,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 45882f65bfcad0..4b05c81b181a82 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -80,6 +80,8 @@ 
 #define IIDR_REVISION			GENMASK(15, 12)
 #define IIDR_IMPLEMENTER		GENMASK(11, 0)
 
+#define ARM_SMMU_AIDR			0x1C
+
 #define ARM_SMMU_CR0			0x20
 #define CR0_ATSCHK			(1 << 4)
 #define CR0_CMDQEN			(1 << 3)
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 4dde745cfb7e29..83b6e1cd338d8f 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -484,15 +484,50 @@  struct iommu_hw_info_vtd {
 	__aligned_u64 ecap_reg;
 };
 
+/**
+ * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information
+ *                                   (IOMMU_HW_INFO_TYPE_ARM_SMMUV3)
+ *
+ * @flags: Must be set to 0
+ * @__reserved: Must be 0
+ * @idr: Implemented features for ARM SMMU Non-secure programming interface
+ * @iidr: Information about the implementation and implementer of ARM SMMU,
+ *        and architecture version supported
+ * @aidr: ARM SMMU architecture version
+ *
+ * For the details of @idr, @iidr and @aidr, please refer to the chapters
+ * from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
+ *
+ * User space should read the underlying ARM SMMUv3 hardware information for
+ * the list of supported features.
+ *
+ * Note that these values reflect the raw HW capability, without any insight if
+ * any required kernel driver support is present. Bits may be set indicating the
+ * HW has functionality that is lacking kernel software support, such as BTM. If
+ * a VMM is using this information to construct emulated copies of these
+ * registers it should only forward bits that it knows it can support.
+ *
+ * In future, presence of required kernel support will be indicated in flags.
+ */
+struct iommu_hw_info_arm_smmuv3 {
+	__u32 flags;
+	__u32 __reserved;
+	__u32 idr[6];
+	__u32 iidr;
+	__u32 aidr;
+};
+
 /**
  * enum iommu_hw_info_type - IOMMU Hardware Info Types
  * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware
  *                           info
  * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
+ * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type
  */
 enum iommu_hw_info_type {
 	IOMMU_HW_INFO_TYPE_NONE = 0,
 	IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
+	IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2,
 };
 
 /**