diff mbox series

[RFC,40/45] iommu/arm-smmu-v3-kvm: Add IOMMU ops

Message ID 20230201125328.2186498-41-jean-philippe@linaro.org (mailing list archive)
State New, archived
Headers show
Series KVM: Arm SMMUv3 driver for pKVM | expand

Commit Message

Jean-Philippe Brucker Feb. 1, 2023, 12:53 p.m. UTC
Forward alloc_domain(), attach_dev(), map_pages(), etc to the
hypervisor.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c   | 330 +++++++++++++++++-
 1 file changed, 328 insertions(+), 2 deletions(-)

Comments

Mostafa Saleh Feb. 7, 2023, 1:22 p.m. UTC | #1
Hi Jean,

On Wed, Feb 01, 2023 at 12:53:24PM +0000, Jean-Philippe Brucker wrote:
> Forward alloc_domain(), attach_dev(), map_pages(), etc to the
> hypervisor.
> 
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
>  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c   | 330 +++++++++++++++++-
>  1 file changed, 328 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
> index 55489d56fb5b..930d78f6e29f 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
> @@ -22,10 +22,28 @@ struct host_arm_smmu_device {
>  #define smmu_to_host(_smmu) \
>  	container_of(_smmu, struct host_arm_smmu_device, smmu);
>  
> +struct kvm_arm_smmu_master {
> +	struct arm_smmu_device		*smmu;
> +	struct device			*dev;
> +	struct kvm_arm_smmu_domain	*domain;
> +};
> +
> +struct kvm_arm_smmu_domain {
> +	struct iommu_domain		domain;
> +	struct arm_smmu_device		*smmu;
> +	struct mutex			init_mutex;
> +	unsigned long			pgd;
> +	pkvm_handle_t			id;
> +};
> +
> +#define to_kvm_smmu_domain(_domain) \
> +	container_of(_domain, struct kvm_arm_smmu_domain, domain)
> +
>  static size_t				kvm_arm_smmu_cur;
>  static size_t				kvm_arm_smmu_count;
>  static struct hyp_arm_smmu_v3_device	*kvm_arm_smmu_array;
>  static struct kvm_hyp_iommu_memcache	*kvm_arm_smmu_memcache;
> +static DEFINE_IDA(kvm_arm_smmu_domain_ida);
>  
>  static DEFINE_PER_CPU(local_lock_t, memcache_lock) =
>  				INIT_LOCAL_LOCK(memcache_lock);
> @@ -57,7 +75,6 @@ static void *kvm_arm_smmu_host_va(phys_addr_t pa)
>  	return __va(pa);
>  }
>  
> -__maybe_unused
>  static int kvm_arm_smmu_topup_memcache(struct arm_smmu_device *smmu)
>  {
>  	struct kvm_hyp_memcache *mc;
> @@ -74,7 +91,6 @@ static int kvm_arm_smmu_topup_memcache(struct arm_smmu_device *smmu)
>  				     kvm_arm_smmu_host_pa, smmu);
>  }
>  
> -__maybe_unused
>  static void kvm_arm_smmu_reclaim_memcache(void)
>  {
>  	struct kvm_hyp_memcache *mc;
> @@ -101,6 +117,299 @@ static void kvm_arm_smmu_reclaim_memcache(void)
>  	__ret;							\
>  })
>  
> +static struct platform_driver kvm_arm_smmu_driver;
> +
> +static struct arm_smmu_device *
> +kvm_arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
> +{
> +	struct device *dev;
> +
> +	dev = driver_find_device_by_fwnode(&kvm_arm_smmu_driver.driver, fwnode);
> +	put_device(dev);
> +	return dev ? dev_get_drvdata(dev) : NULL;
> +}
> +
> +static struct iommu_ops kvm_arm_smmu_ops;
> +
> +static struct iommu_device *kvm_arm_smmu_probe_device(struct device *dev)
> +{
> +	struct arm_smmu_device *smmu;
> +	struct kvm_arm_smmu_master *master;
> +	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> +
> +	if (!fwspec || fwspec->ops != &kvm_arm_smmu_ops)
> +		return ERR_PTR(-ENODEV);
> +
> +	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
> +		return ERR_PTR(-EBUSY);
> +
> +	smmu = kvm_arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
> +	if (!smmu)
> +		return ERR_PTR(-ENODEV);
> +
> +	master = kzalloc(sizeof(*master), GFP_KERNEL);
> +	if (!master)
> +		return ERR_PTR(-ENOMEM);
> +
> +	master->dev = dev;
> +	master->smmu = smmu;
> +	dev_iommu_priv_set(dev, master);
> +
> +	return &smmu->iommu;
> +}
> +
> +static void kvm_arm_smmu_release_device(struct device *dev)
> +{
> +	struct kvm_arm_smmu_master *master = dev_iommu_priv_get(dev);
> +
> +	kfree(master);
> +	iommu_fwspec_free(dev);
> +}
> +
> +static struct iommu_domain *kvm_arm_smmu_domain_alloc(unsigned type)
> +{
> +	struct kvm_arm_smmu_domain *kvm_smmu_domain;
> +
> +	/*
> +	 * We don't support
> +	 * - IOMMU_DOMAIN_IDENTITY because we rely on the host telling the
> +	 *   hypervisor which pages are used for DMA.
> +	 * - IOMMU_DOMAIN_DMA_FQ because lazy unmap would clash with memory
> +	 *   donation to guests.
> +	 */
> +	if (type != IOMMU_DOMAIN_DMA &&
> +	    type != IOMMU_DOMAIN_UNMANAGED)
> +		return NULL;
> +
> +	kvm_smmu_domain = kzalloc(sizeof(*kvm_smmu_domain), GFP_KERNEL);
> +	if (!kvm_smmu_domain)
> +		return NULL;
> +
> +	mutex_init(&kvm_smmu_domain->init_mutex);
> +
> +	return &kvm_smmu_domain->domain;
> +}
> +
> +static int kvm_arm_smmu_domain_finalize(struct kvm_arm_smmu_domain *kvm_smmu_domain,
> +					struct kvm_arm_smmu_master *master)
> +{
> +	int ret = 0;
> +	struct page *p;
> +	unsigned long pgd;
> +	struct arm_smmu_device *smmu = master->smmu;
> +	struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
> +
> +	if (kvm_smmu_domain->smmu) {
> +		if (kvm_smmu_domain->smmu != smmu)
> +			return -EINVAL;
> +		return 0;
> +	}
> +
> +	ret = ida_alloc_range(&kvm_arm_smmu_domain_ida, 0, 1 << smmu->vmid_bits,
> +			      GFP_KERNEL);
> +	if (ret < 0)
> +		return ret;
> +	kvm_smmu_domain->id = ret;
> +
> +	/*
> +	 * PGD allocation does not use the memcache because it may be of higher
> +	 * order when concatenated.
> +	 */
> +	p = alloc_pages_node(dev_to_node(smmu->dev), GFP_KERNEL | __GFP_ZERO,
> +			     host_smmu->pgd_order);
> +	if (!p)
> +		return -ENOMEM;
> +
> +	pgd = (unsigned long)page_to_virt(p);
> +
> +	local_lock_irq(&memcache_lock);
> +	ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_alloc_domain,
> +				   host_smmu->id, kvm_smmu_domain->id, pgd);

What is the idea of postponing this HVC to attach and don’t call it in
alloc_domain HVC?

Thanks,
Mostafa
Jean-Philippe Brucker Feb. 8, 2023, 6:13 p.m. UTC | #2
On Tue, Feb 07, 2023 at 01:22:11PM +0000, Mostafa Saleh wrote:
> > +static struct iommu_domain *kvm_arm_smmu_domain_alloc(unsigned type)
> > +{
> > +	struct kvm_arm_smmu_domain *kvm_smmu_domain;
> > +
> > +	/*
> > +	 * We don't support
> > +	 * - IOMMU_DOMAIN_IDENTITY because we rely on the host telling the
> > +	 *   hypervisor which pages are used for DMA.
> > +	 * - IOMMU_DOMAIN_DMA_FQ because lazy unmap would clash with memory
> > +	 *   donation to guests.
> > +	 */
> > +	if (type != IOMMU_DOMAIN_DMA &&
> > +	    type != IOMMU_DOMAIN_UNMANAGED)
> > +		return NULL;
> > +
> > +	kvm_smmu_domain = kzalloc(sizeof(*kvm_smmu_domain), GFP_KERNEL);
> > +	if (!kvm_smmu_domain)
> > +		return NULL;
> > +
> > +	mutex_init(&kvm_smmu_domain->init_mutex);
> > +
> > +	return &kvm_smmu_domain->domain;
> > +}
> > +
> > +static int kvm_arm_smmu_domain_finalize(struct kvm_arm_smmu_domain *kvm_smmu_domain,
> > +					struct kvm_arm_smmu_master *master)
> > +{
> > +	int ret = 0;
> > +	struct page *p;
> > +	unsigned long pgd;
> > +	struct arm_smmu_device *smmu = master->smmu;
> > +	struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
> > +
> > +	if (kvm_smmu_domain->smmu) {
> > +		if (kvm_smmu_domain->smmu != smmu)
> > +			return -EINVAL;
> > +		return 0;
> > +	}
> > +
> > +	ret = ida_alloc_range(&kvm_arm_smmu_domain_ida, 0, 1 << smmu->vmid_bits,
> > +			      GFP_KERNEL);
> > +	if (ret < 0)
> > +		return ret;
> > +	kvm_smmu_domain->id = ret;
> > +
> > +	/*
> > +	 * PGD allocation does not use the memcache because it may be of higher
> > +	 * order when concatenated.
> > +	 */
> > +	p = alloc_pages_node(dev_to_node(smmu->dev), GFP_KERNEL | __GFP_ZERO,
> > +			     host_smmu->pgd_order);
> > +	if (!p)
> > +		return -ENOMEM;
> > +
> > +	pgd = (unsigned long)page_to_virt(p);
> > +
> > +	local_lock_irq(&memcache_lock);
> > +	ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_alloc_domain,
> > +				   host_smmu->id, kvm_smmu_domain->id, pgd);
> 
> What is the idea of postponing this HVC to attach and don’t call it in
> alloc_domain HVC?

Yes ideally this HVC would be in kvm_arm_smmu_domain_alloc() above, but
due to the way the IOMMU API works at the moment, that function doesn't
take a context parameter. So we don't know which SMMU this will be for,
which is problematic. We don't know which page table formats are
supported, how many VMIDs, how to allocate memory for the page tables
(which NUMA node is the SMMU on, can it access high physical addresses or
does it need special allocations?)

I think there are plans to add a context to domain_alloc(), but at the
moment IOMMU drivers complete the domain allocation in attach().

Thanks,
Jean
Mostafa Saleh Sept. 20, 2023, 4:27 p.m. UTC | #3
On Wed, Feb 01, 2023 at 12:53:24PM +0000, Jean-Philippe Brucker wrote:
> Forward alloc_domain(), attach_dev(), map_pages(), etc to the
> hypervisor.
> 
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
>  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c   | 330 +++++++++++++++++-
>  1 file changed, 328 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
> index 55489d56fb5b..930d78f6e29f 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
> @@ -22,10 +22,28 @@ struct host_arm_smmu_device {
>  #define smmu_to_host(_smmu) \
>  	container_of(_smmu, struct host_arm_smmu_device, smmu);
>  
> +struct kvm_arm_smmu_master {
> +	struct arm_smmu_device		*smmu;
> +	struct device			*dev;
> +	struct kvm_arm_smmu_domain	*domain;
> +};
> +
> +struct kvm_arm_smmu_domain {
> +	struct iommu_domain		domain;
> +	struct arm_smmu_device		*smmu;
> +	struct mutex			init_mutex;
> +	unsigned long			pgd;
> +	pkvm_handle_t			id;
> +};
> +
> +#define to_kvm_smmu_domain(_domain) \
> +	container_of(_domain, struct kvm_arm_smmu_domain, domain)
> +
>  static size_t				kvm_arm_smmu_cur;
>  static size_t				kvm_arm_smmu_count;
>  static struct hyp_arm_smmu_v3_device	*kvm_arm_smmu_array;
>  static struct kvm_hyp_iommu_memcache	*kvm_arm_smmu_memcache;
> +static DEFINE_IDA(kvm_arm_smmu_domain_ida);
>  
>  static DEFINE_PER_CPU(local_lock_t, memcache_lock) =
>  				INIT_LOCAL_LOCK(memcache_lock);
> @@ -57,7 +75,6 @@ static void *kvm_arm_smmu_host_va(phys_addr_t pa)
>  	return __va(pa);
>  }
>  
> -__maybe_unused
>  static int kvm_arm_smmu_topup_memcache(struct arm_smmu_device *smmu)
>  {
>  	struct kvm_hyp_memcache *mc;
> @@ -74,7 +91,6 @@ static int kvm_arm_smmu_topup_memcache(struct arm_smmu_device *smmu)
>  				     kvm_arm_smmu_host_pa, smmu);
>  }
>  
> -__maybe_unused
>  static void kvm_arm_smmu_reclaim_memcache(void)
>  {
>  	struct kvm_hyp_memcache *mc;
> @@ -101,6 +117,299 @@ static void kvm_arm_smmu_reclaim_memcache(void)
>  	__ret;							\
>  })
>  
> +static struct platform_driver kvm_arm_smmu_driver;
> +
> +static struct arm_smmu_device *
> +kvm_arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
> +{
> +	struct device *dev;
> +
> +	dev = driver_find_device_by_fwnode(&kvm_arm_smmu_driver.driver, fwnode);
> +	put_device(dev);
> +	return dev ? dev_get_drvdata(dev) : NULL;
> +}
> +
> +static struct iommu_ops kvm_arm_smmu_ops;
> +
> +static struct iommu_device *kvm_arm_smmu_probe_device(struct device *dev)
> +{
> +	struct arm_smmu_device *smmu;
> +	struct kvm_arm_smmu_master *master;
> +	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> +
> +	if (!fwspec || fwspec->ops != &kvm_arm_smmu_ops)
> +		return ERR_PTR(-ENODEV);
> +
> +	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
> +		return ERR_PTR(-EBUSY);
> +
> +	smmu = kvm_arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
> +	if (!smmu)
> +		return ERR_PTR(-ENODEV);
> +
> +	master = kzalloc(sizeof(*master), GFP_KERNEL);
> +	if (!master)
> +		return ERR_PTR(-ENOMEM);
> +
> +	master->dev = dev;
> +	master->smmu = smmu;
> +	dev_iommu_priv_set(dev, master);
> +
> +	return &smmu->iommu;
> +}
> +
> +static void kvm_arm_smmu_release_device(struct device *dev)
> +{
> +	struct kvm_arm_smmu_master *master = dev_iommu_priv_get(dev);
> +
> +	kfree(master);
> +	iommu_fwspec_free(dev);
> +}
> +
> +static struct iommu_domain *kvm_arm_smmu_domain_alloc(unsigned type)
> +{
> +	struct kvm_arm_smmu_domain *kvm_smmu_domain;
> +
> +	/*
> +	 * We don't support
> +	 * - IOMMU_DOMAIN_IDENTITY because we rely on the host telling the
> +	 *   hypervisor which pages are used for DMA.
> +	 * - IOMMU_DOMAIN_DMA_FQ because lazy unmap would clash with memory
> +	 *   donation to guests.
> +	 */
> +	if (type != IOMMU_DOMAIN_DMA &&
> +	    type != IOMMU_DOMAIN_UNMANAGED)
> +		return NULL;
> +
> +	kvm_smmu_domain = kzalloc(sizeof(*kvm_smmu_domain), GFP_KERNEL);
> +	if (!kvm_smmu_domain)
> +		return NULL;
> +
> +	mutex_init(&kvm_smmu_domain->init_mutex);
> +
> +	return &kvm_smmu_domain->domain;
> +}
> +
> +static int kvm_arm_smmu_domain_finalize(struct kvm_arm_smmu_domain *kvm_smmu_domain,
> +					struct kvm_arm_smmu_master *master)
> +{
> +	int ret = 0;
> +	struct page *p;
> +	unsigned long pgd;
> +	struct arm_smmu_device *smmu = master->smmu;
> +	struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
> +
> +	if (kvm_smmu_domain->smmu) {
> +		if (kvm_smmu_domain->smmu != smmu)
> +			return -EINVAL;
> +		return 0;
> +	}
> +
> +	ret = ida_alloc_range(&kvm_arm_smmu_domain_ida, 0, 1 << smmu->vmid_bits,
> +			      GFP_KERNEL);
> +	if (ret < 0)
> +		return ret;
> +	kvm_smmu_domain->id = ret;
> +
> +	/*
> +	 * PGD allocation does not use the memcache because it may be of higher
> +	 * order when concatenated.
> +	 */
> +	p = alloc_pages_node(dev_to_node(smmu->dev), GFP_KERNEL | __GFP_ZERO,
> +			     host_smmu->pgd_order);
> +	if (!p)
> +		return -ENOMEM;
> +
> +	pgd = (unsigned long)page_to_virt(p);
> +
> +	local_lock_irq(&memcache_lock);
> +	ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_alloc_domain,
> +				   host_smmu->id, kvm_smmu_domain->id, pgd);
> +	local_unlock_irq(&memcache_lock);
> +	if (ret)
> +		goto err_free;
> +
> +	kvm_smmu_domain->domain.pgsize_bitmap = smmu->pgsize_bitmap;
> +	kvm_smmu_domain->domain.geometry.aperture_end = (1UL << smmu->ias) - 1;
> +	kvm_smmu_domain->domain.geometry.force_aperture = true;
> +	kvm_smmu_domain->smmu = smmu;
> +	kvm_smmu_domain->pgd = pgd;
> +
> +	return 0;
> +
> +err_free:
> +	free_pages(pgd, host_smmu->pgd_order);
> +	ida_free(&kvm_arm_smmu_domain_ida, kvm_smmu_domain->id);
> +	return ret;
> +}
> +
> +static void kvm_arm_smmu_domain_free(struct iommu_domain *domain)
> +{
> +	int ret;
> +	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
> +	struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
> +
> +	if (smmu) {
> +		struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
> +
> +		ret = kvm_call_hyp_nvhe(__pkvm_host_iommu_free_domain,
> +					host_smmu->id, kvm_smmu_domain->id);
> +		/*
> +		 * On failure, leak the pgd because it probably hasn't been
> +		 * reclaimed by the host.
> +		 */
> +		if (!WARN_ON(ret))
> +			free_pages(kvm_smmu_domain->pgd, host_smmu->pgd_order);
I believe this doube-free the pgd in case of attatch_dev fails, as it
would try to free it their also (in kvm_arm_smmu_domain_finalize).

I think this is right place to free the pgd.

> +		ida_free(&kvm_arm_smmu_domain_ida, kvm_smmu_domain->id);
> +	}
> +	kfree(kvm_smmu_domain);
> +}
> +
> +static int kvm_arm_smmu_detach_dev(struct host_arm_smmu_device *host_smmu,
> +				   struct kvm_arm_smmu_master *master)
> +{
> +	int i, ret;
> +	struct arm_smmu_device *smmu = &host_smmu->smmu;
> +	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
> +
> +	if (!master->domain)
> +		return 0;
> +
> +	for (i = 0; i < fwspec->num_ids; i++) {
> +		int sid = fwspec->ids[i];
> +
> +		ret = kvm_call_hyp_nvhe(__pkvm_host_iommu_detach_dev,
> +					host_smmu->id, master->domain->id, sid);
> +		if (ret) {
> +			dev_err(smmu->dev, "cannot detach device %s (0x%x): %d\n",
> +				dev_name(master->dev), sid, ret);
> +			break;
> +		}
> +	}
> +
> +	master->domain = NULL;
> +
> +	return ret;
> +}
> +
> +static int kvm_arm_smmu_attach_dev(struct iommu_domain *domain,
> +				   struct device *dev)
> +{
> +	int i, ret;
> +	struct arm_smmu_device *smmu;
> +	struct host_arm_smmu_device *host_smmu;
> +	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> +	struct kvm_arm_smmu_master *master = dev_iommu_priv_get(dev);
> +	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
> +
> +	if (!master)
> +		return -ENODEV;
> +
> +	smmu = master->smmu;
> +	host_smmu = smmu_to_host(smmu);
> +
> +	ret = kvm_arm_smmu_detach_dev(host_smmu, master);
> +	if (ret)
> +		return ret;
> +
> +	mutex_lock(&kvm_smmu_domain->init_mutex);
> +	ret = kvm_arm_smmu_domain_finalize(kvm_smmu_domain, master);
> +	mutex_unlock(&kvm_smmu_domain->init_mutex);
> +	if (ret)
> +		return ret;
> +
> +	local_lock_irq(&memcache_lock);
> +	for (i = 0; i < fwspec->num_ids; i++) {
> +		int sid = fwspec->ids[i];
> +
> +		ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_attach_dev,
> +					   host_smmu->id, kvm_smmu_domain->id,
> +					   sid);
> +		if (ret) {
> +			dev_err(smmu->dev, "cannot attach device %s (0x%x): %d\n",
> +				dev_name(dev), sid, ret);
> +			goto out_unlock;
> +		}
> +	}
> +	master->domain = kvm_smmu_domain;
> +
> +out_unlock:
> +	if (ret)
> +		kvm_arm_smmu_detach_dev(host_smmu, master);
> +	local_unlock_irq(&memcache_lock);
> +	return ret;
> +}
> +
> +static int kvm_arm_smmu_map_pages(struct iommu_domain *domain,
> +				  unsigned long iova, phys_addr_t paddr,
> +				  size_t pgsize, size_t pgcount, int prot,
> +				  gfp_t gfp, size_t *mapped)
> +{
> +	int ret;
> +	unsigned long irqflags;
> +	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
> +	struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
> +	struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
> +
> +	local_lock_irqsave(&memcache_lock, irqflags);
> +	ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_map_pages,
> +				   host_smmu->id, kvm_smmu_domain->id, iova,
> +				   paddr, pgsize, pgcount, prot);
> +	local_unlock_irqrestore(&memcache_lock, irqflags);
> +	if (ret)
> +		return ret;
> +
> +	*mapped = pgsize * pgcount;
> +	return 0;
> +}
> +
> +static size_t kvm_arm_smmu_unmap_pages(struct iommu_domain *domain,
> +				       unsigned long iova, size_t pgsize,
> +				       size_t pgcount,
> +				       struct iommu_iotlb_gather *iotlb_gather)
> +{
> +	int ret;
> +	unsigned long irqflags;
> +	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
> +	struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
> +	struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
> +
> +	local_lock_irqsave(&memcache_lock, irqflags);
> +	ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_unmap_pages,
> +				   host_smmu->id, kvm_smmu_domain->id, iova,
> +				   pgsize, pgcount);
> +	local_unlock_irqrestore(&memcache_lock, irqflags);
> +
> +	return ret ? 0 : pgsize * pgcount;
> +}
> +
> +static phys_addr_t kvm_arm_smmu_iova_to_phys(struct iommu_domain *domain,
> +					     dma_addr_t iova)
> +{
> +	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
> +	struct host_arm_smmu_device *host_smmu = smmu_to_host(kvm_smmu_domain->smmu);
> +
> +	return kvm_call_hyp_nvhe(__pkvm_host_iommu_iova_to_phys, host_smmu->id,
> +				 kvm_smmu_domain->id, iova);
> +}
> +
> +static struct iommu_ops kvm_arm_smmu_ops = {
> +	.capable		= arm_smmu_capable,
> +	.device_group		= arm_smmu_device_group,
> +	.of_xlate		= arm_smmu_of_xlate,
> +	.probe_device		= kvm_arm_smmu_probe_device,
> +	.release_device		= kvm_arm_smmu_release_device,
> +	.domain_alloc		= kvm_arm_smmu_domain_alloc,
> +	.owner			= THIS_MODULE,
> +	.default_domain_ops = &(const struct iommu_domain_ops) {
> +		.attach_dev	= kvm_arm_smmu_attach_dev,
> +		.free		= kvm_arm_smmu_domain_free,
> +		.map_pages	= kvm_arm_smmu_map_pages,
> +		.unmap_pages	= kvm_arm_smmu_unmap_pages,
> +		.iova_to_phys	= kvm_arm_smmu_iova_to_phys,
> +	}
> +};
> +
>  static bool kvm_arm_smmu_validate_features(struct arm_smmu_device *smmu)
>  {
>  	unsigned long oas;
> @@ -186,6 +495,12 @@ static int kvm_arm_smmu_device_reset(struct host_arm_smmu_device *host_smmu)
>  	return 0;
>  }
>  
> +static void *kvm_arm_smmu_alloc_domains(struct arm_smmu_device *smmu)
> +{
> +	return (void *)devm_get_free_pages(smmu->dev, GFP_KERNEL | __GFP_ZERO,
> +					   get_order(KVM_IOMMU_DOMAINS_ROOT_SIZE));
> +}
> +
>  static int kvm_arm_smmu_probe(struct platform_device *pdev)
>  {
>  	int ret;
> @@ -274,6 +589,16 @@ static int kvm_arm_smmu_probe(struct platform_device *pdev)
>  	if (ret)
>  		return ret;
>  
> +	hyp_smmu->iommu.domains = kvm_arm_smmu_alloc_domains(smmu);
> +	if (!hyp_smmu->iommu.domains)
> +		return -ENOMEM;
> +
> +	hyp_smmu->iommu.nr_domains = 1 << smmu->vmid_bits;
> +
> +	ret = arm_smmu_register_iommu(smmu, &kvm_arm_smmu_ops, ioaddr);
> +	if (ret)
> +		return ret;
> +
>  	platform_set_drvdata(pdev, host_smmu);
>  
>  	/* Hypervisor parameters */
> @@ -296,6 +621,7 @@ static int kvm_arm_smmu_remove(struct platform_device *pdev)
>  	 * There was an error during hypervisor setup. The hyp driver may
>  	 * have already enabled the device, so disable it.
>  	 */
> +	arm_smmu_unregister_iommu(smmu);
>  	arm_smmu_device_disable(smmu);
>  	arm_smmu_update_gbpa(smmu, host_smmu->boot_gbpa, GBPA_ABORT);
>  	return 0;
> -- 
> 2.39.0
> 
>
Thanks,
Mostafa
Jean-Philippe Brucker Sept. 25, 2023, 5:18 p.m. UTC | #4
On Wed, Sep 20, 2023 at 04:27:41PM +0000, Mostafa Saleh wrote:
> > +static void kvm_arm_smmu_domain_free(struct iommu_domain *domain)
> > +{
> > +	int ret;
> > +	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
> > +	struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
> > +
> > +	if (smmu) {
> > +		struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
> > +
> > +		ret = kvm_call_hyp_nvhe(__pkvm_host_iommu_free_domain,
> > +					host_smmu->id, kvm_smmu_domain->id);
> > +		/*
> > +		 * On failure, leak the pgd because it probably hasn't been
> > +		 * reclaimed by the host.
> > +		 */
> > +		if (!WARN_ON(ret))
> > +			free_pages(kvm_smmu_domain->pgd, host_smmu->pgd_order);
> I believe this doube-free the pgd in case of attatch_dev fails, as it
> would try to free it their also (in kvm_arm_smmu_domain_finalize).
> 
> I think this is right place to free the pgd.

Since this depends on kvm_smmu_domain->smmu being non-NULL, which is only
true if finalize() succeeded, then we shouldn't get a double-free.

But finalize() does leak kvm_smmu_domain->id if the pgd allocation fails,
I fixed that.

Thanks,
Jean
Mostafa Saleh Sept. 26, 2023, 9:54 a.m. UTC | #5
Hi Jean,

On Mon, Sep 25, 2023 at 06:18:53PM +0100, Jean-Philippe Brucker wrote:
> On Wed, Sep 20, 2023 at 04:27:41PM +0000, Mostafa Saleh wrote:
> > > +static void kvm_arm_smmu_domain_free(struct iommu_domain *domain)
> > > +{
> > > +	int ret;
> > > +	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
> > > +	struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
> > > +
> > > +	if (smmu) {
> > > +		struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
> > > +
> > > +		ret = kvm_call_hyp_nvhe(__pkvm_host_iommu_free_domain,
> > > +					host_smmu->id, kvm_smmu_domain->id);
> > > +		/*
> > > +		 * On failure, leak the pgd because it probably hasn't been
> > > +		 * reclaimed by the host.
> > > +		 */
> > > +		if (!WARN_ON(ret))
> > > +			free_pages(kvm_smmu_domain->pgd, host_smmu->pgd_order);
> > I believe this doube-free the pgd in case of attatch_dev fails, as it
> > would try to free it their also (in kvm_arm_smmu_domain_finalize).
> > 
> > I think this is right place to free the pgd.
> 
> Since this depends on kvm_smmu_domain->smmu being non-NULL, which is only
> true if finalize() succeeded, then we shouldn't get a double-free.

Yes, the other free was comming from an experiment I was making to use
the IOMMU layer with guest VMs, so this is correct. Sorry about that.

> But finalize() does leak kvm_smmu_domain->id if the pgd allocation fails,
> I fixed that.

Thanks!

> Thanks,
> Jean
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
index 55489d56fb5b..930d78f6e29f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
@@ -22,10 +22,28 @@  struct host_arm_smmu_device {
 #define smmu_to_host(_smmu) \
 	container_of(_smmu, struct host_arm_smmu_device, smmu);
 
+struct kvm_arm_smmu_master {
+	struct arm_smmu_device		*smmu;
+	struct device			*dev;
+	struct kvm_arm_smmu_domain	*domain;
+};
+
+struct kvm_arm_smmu_domain {
+	struct iommu_domain		domain;
+	struct arm_smmu_device		*smmu;
+	struct mutex			init_mutex;
+	unsigned long			pgd;
+	pkvm_handle_t			id;
+};
+
+#define to_kvm_smmu_domain(_domain) \
+	container_of(_domain, struct kvm_arm_smmu_domain, domain)
+
 static size_t				kvm_arm_smmu_cur;
 static size_t				kvm_arm_smmu_count;
 static struct hyp_arm_smmu_v3_device	*kvm_arm_smmu_array;
 static struct kvm_hyp_iommu_memcache	*kvm_arm_smmu_memcache;
+static DEFINE_IDA(kvm_arm_smmu_domain_ida);
 
 static DEFINE_PER_CPU(local_lock_t, memcache_lock) =
 				INIT_LOCAL_LOCK(memcache_lock);
@@ -57,7 +75,6 @@  static void *kvm_arm_smmu_host_va(phys_addr_t pa)
 	return __va(pa);
 }
 
-__maybe_unused
 static int kvm_arm_smmu_topup_memcache(struct arm_smmu_device *smmu)
 {
 	struct kvm_hyp_memcache *mc;
@@ -74,7 +91,6 @@  static int kvm_arm_smmu_topup_memcache(struct arm_smmu_device *smmu)
 				     kvm_arm_smmu_host_pa, smmu);
 }
 
-__maybe_unused
 static void kvm_arm_smmu_reclaim_memcache(void)
 {
 	struct kvm_hyp_memcache *mc;
@@ -101,6 +117,299 @@  static void kvm_arm_smmu_reclaim_memcache(void)
 	__ret;							\
 })
 
+static struct platform_driver kvm_arm_smmu_driver;
+
+static struct arm_smmu_device *
+kvm_arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
+{
+	struct device *dev;
+
+	dev = driver_find_device_by_fwnode(&kvm_arm_smmu_driver.driver, fwnode);
+	put_device(dev);
+	return dev ? dev_get_drvdata(dev) : NULL;
+}
+
+static struct iommu_ops kvm_arm_smmu_ops;
+
+static struct iommu_device *kvm_arm_smmu_probe_device(struct device *dev)
+{
+	struct arm_smmu_device *smmu;
+	struct kvm_arm_smmu_master *master;
+	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
+	if (!fwspec || fwspec->ops != &kvm_arm_smmu_ops)
+		return ERR_PTR(-ENODEV);
+
+	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
+		return ERR_PTR(-EBUSY);
+
+	smmu = kvm_arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
+	if (!smmu)
+		return ERR_PTR(-ENODEV);
+
+	master = kzalloc(sizeof(*master), GFP_KERNEL);
+	if (!master)
+		return ERR_PTR(-ENOMEM);
+
+	master->dev = dev;
+	master->smmu = smmu;
+	dev_iommu_priv_set(dev, master);
+
+	return &smmu->iommu;
+}
+
+static void kvm_arm_smmu_release_device(struct device *dev)
+{
+	struct kvm_arm_smmu_master *master = dev_iommu_priv_get(dev);
+
+	kfree(master);
+	iommu_fwspec_free(dev);
+}
+
+static struct iommu_domain *kvm_arm_smmu_domain_alloc(unsigned type)
+{
+	struct kvm_arm_smmu_domain *kvm_smmu_domain;
+
+	/*
+	 * We don't support
+	 * - IOMMU_DOMAIN_IDENTITY because we rely on the host telling the
+	 *   hypervisor which pages are used for DMA.
+	 * - IOMMU_DOMAIN_DMA_FQ because lazy unmap would clash with memory
+	 *   donation to guests.
+	 */
+	if (type != IOMMU_DOMAIN_DMA &&
+	    type != IOMMU_DOMAIN_UNMANAGED)
+		return NULL;
+
+	kvm_smmu_domain = kzalloc(sizeof(*kvm_smmu_domain), GFP_KERNEL);
+	if (!kvm_smmu_domain)
+		return NULL;
+
+	mutex_init(&kvm_smmu_domain->init_mutex);
+
+	return &kvm_smmu_domain->domain;
+}
+
+static int kvm_arm_smmu_domain_finalize(struct kvm_arm_smmu_domain *kvm_smmu_domain,
+					struct kvm_arm_smmu_master *master)
+{
+	int ret = 0;
+	struct page *p;
+	unsigned long pgd;
+	struct arm_smmu_device *smmu = master->smmu;
+	struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
+
+	if (kvm_smmu_domain->smmu) {
+		if (kvm_smmu_domain->smmu != smmu)
+			return -EINVAL;
+		return 0;
+	}
+
+	ret = ida_alloc_range(&kvm_arm_smmu_domain_ida, 0, 1 << smmu->vmid_bits,
+			      GFP_KERNEL);
+	if (ret < 0)
+		return ret;
+	kvm_smmu_domain->id = ret;
+
+	/*
+	 * PGD allocation does not use the memcache because it may be of higher
+	 * order when concatenated.
+	 */
+	p = alloc_pages_node(dev_to_node(smmu->dev), GFP_KERNEL | __GFP_ZERO,
+			     host_smmu->pgd_order);
+	if (!p)
+		return -ENOMEM;
+
+	pgd = (unsigned long)page_to_virt(p);
+
+	local_lock_irq(&memcache_lock);
+	ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_alloc_domain,
+				   host_smmu->id, kvm_smmu_domain->id, pgd);
+	local_unlock_irq(&memcache_lock);
+	if (ret)
+		goto err_free;
+
+	kvm_smmu_domain->domain.pgsize_bitmap = smmu->pgsize_bitmap;
+	kvm_smmu_domain->domain.geometry.aperture_end = (1UL << smmu->ias) - 1;
+	kvm_smmu_domain->domain.geometry.force_aperture = true;
+	kvm_smmu_domain->smmu = smmu;
+	kvm_smmu_domain->pgd = pgd;
+
+	return 0;
+
+err_free:
+	free_pages(pgd, host_smmu->pgd_order);
+	ida_free(&kvm_arm_smmu_domain_ida, kvm_smmu_domain->id);
+	return ret;
+}
+
+static void kvm_arm_smmu_domain_free(struct iommu_domain *domain)
+{
+	int ret;
+	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
+	struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
+
+	if (smmu) {
+		struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
+
+		ret = kvm_call_hyp_nvhe(__pkvm_host_iommu_free_domain,
+					host_smmu->id, kvm_smmu_domain->id);
+		/*
+		 * On failure, leak the pgd because it probably hasn't been
+		 * reclaimed by the host.
+		 */
+		if (!WARN_ON(ret))
+			free_pages(kvm_smmu_domain->pgd, host_smmu->pgd_order);
+		ida_free(&kvm_arm_smmu_domain_ida, kvm_smmu_domain->id);
+	}
+	kfree(kvm_smmu_domain);
+}
+
+static int kvm_arm_smmu_detach_dev(struct host_arm_smmu_device *host_smmu,
+				   struct kvm_arm_smmu_master *master)
+{
+	int i, ret;
+	struct arm_smmu_device *smmu = &host_smmu->smmu;
+	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
+
+	if (!master->domain)
+		return 0;
+
+	for (i = 0; i < fwspec->num_ids; i++) {
+		int sid = fwspec->ids[i];
+
+		ret = kvm_call_hyp_nvhe(__pkvm_host_iommu_detach_dev,
+					host_smmu->id, master->domain->id, sid);
+		if (ret) {
+			dev_err(smmu->dev, "cannot detach device %s (0x%x): %d\n",
+				dev_name(master->dev), sid, ret);
+			break;
+		}
+	}
+
+	master->domain = NULL;
+
+	return ret;
+}
+
+static int kvm_arm_smmu_attach_dev(struct iommu_domain *domain,
+				   struct device *dev)
+{
+	int i, ret;
+	struct arm_smmu_device *smmu;
+	struct host_arm_smmu_device *host_smmu;
+	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+	struct kvm_arm_smmu_master *master = dev_iommu_priv_get(dev);
+	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
+
+	if (!master)
+		return -ENODEV;
+
+	smmu = master->smmu;
+	host_smmu = smmu_to_host(smmu);
+
+	ret = kvm_arm_smmu_detach_dev(host_smmu, master);
+	if (ret)
+		return ret;
+
+	mutex_lock(&kvm_smmu_domain->init_mutex);
+	ret = kvm_arm_smmu_domain_finalize(kvm_smmu_domain, master);
+	mutex_unlock(&kvm_smmu_domain->init_mutex);
+	if (ret)
+		return ret;
+
+	local_lock_irq(&memcache_lock);
+	for (i = 0; i < fwspec->num_ids; i++) {
+		int sid = fwspec->ids[i];
+
+		ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_attach_dev,
+					   host_smmu->id, kvm_smmu_domain->id,
+					   sid);
+		if (ret) {
+			dev_err(smmu->dev, "cannot attach device %s (0x%x): %d\n",
+				dev_name(dev), sid, ret);
+			goto out_unlock;
+		}
+	}
+	master->domain = kvm_smmu_domain;
+
+out_unlock:
+	if (ret)
+		kvm_arm_smmu_detach_dev(host_smmu, master);
+	local_unlock_irq(&memcache_lock);
+	return ret;
+}
+
+static int kvm_arm_smmu_map_pages(struct iommu_domain *domain,
+				  unsigned long iova, phys_addr_t paddr,
+				  size_t pgsize, size_t pgcount, int prot,
+				  gfp_t gfp, size_t *mapped)
+{
+	int ret;
+	unsigned long irqflags;
+	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
+	struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
+	struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
+
+	local_lock_irqsave(&memcache_lock, irqflags);
+	ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_map_pages,
+				   host_smmu->id, kvm_smmu_domain->id, iova,
+				   paddr, pgsize, pgcount, prot);
+	local_unlock_irqrestore(&memcache_lock, irqflags);
+	if (ret)
+		return ret;
+
+	*mapped = pgsize * pgcount;
+	return 0;
+}
+
+static size_t kvm_arm_smmu_unmap_pages(struct iommu_domain *domain,
+				       unsigned long iova, size_t pgsize,
+				       size_t pgcount,
+				       struct iommu_iotlb_gather *iotlb_gather)
+{
+	int ret;
+	unsigned long irqflags;
+	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
+	struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
+	struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
+
+	local_lock_irqsave(&memcache_lock, irqflags);
+	ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_unmap_pages,
+				   host_smmu->id, kvm_smmu_domain->id, iova,
+				   pgsize, pgcount);
+	local_unlock_irqrestore(&memcache_lock, irqflags);
+
+	return ret ? 0 : pgsize * pgcount;
+}
+
+static phys_addr_t kvm_arm_smmu_iova_to_phys(struct iommu_domain *domain,
+					     dma_addr_t iova)
+{
+	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
+	struct host_arm_smmu_device *host_smmu = smmu_to_host(kvm_smmu_domain->smmu);
+
+	return kvm_call_hyp_nvhe(__pkvm_host_iommu_iova_to_phys, host_smmu->id,
+				 kvm_smmu_domain->id, iova);
+}
+
+static struct iommu_ops kvm_arm_smmu_ops = {
+	.capable		= arm_smmu_capable,
+	.device_group		= arm_smmu_device_group,
+	.of_xlate		= arm_smmu_of_xlate,
+	.probe_device		= kvm_arm_smmu_probe_device,
+	.release_device		= kvm_arm_smmu_release_device,
+	.domain_alloc		= kvm_arm_smmu_domain_alloc,
+	.owner			= THIS_MODULE,
+	.default_domain_ops = &(const struct iommu_domain_ops) {
+		.attach_dev	= kvm_arm_smmu_attach_dev,
+		.free		= kvm_arm_smmu_domain_free,
+		.map_pages	= kvm_arm_smmu_map_pages,
+		.unmap_pages	= kvm_arm_smmu_unmap_pages,
+		.iova_to_phys	= kvm_arm_smmu_iova_to_phys,
+	}
+};
+
 static bool kvm_arm_smmu_validate_features(struct arm_smmu_device *smmu)
 {
 	unsigned long oas;
@@ -186,6 +495,12 @@  static int kvm_arm_smmu_device_reset(struct host_arm_smmu_device *host_smmu)
 	return 0;
 }
 
+static void *kvm_arm_smmu_alloc_domains(struct arm_smmu_device *smmu)
+{
+	return (void *)devm_get_free_pages(smmu->dev, GFP_KERNEL | __GFP_ZERO,
+					   get_order(KVM_IOMMU_DOMAINS_ROOT_SIZE));
+}
+
 static int kvm_arm_smmu_probe(struct platform_device *pdev)
 {
 	int ret;
@@ -274,6 +589,16 @@  static int kvm_arm_smmu_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	hyp_smmu->iommu.domains = kvm_arm_smmu_alloc_domains(smmu);
+	if (!hyp_smmu->iommu.domains)
+		return -ENOMEM;
+
+	hyp_smmu->iommu.nr_domains = 1 << smmu->vmid_bits;
+
+	ret = arm_smmu_register_iommu(smmu, &kvm_arm_smmu_ops, ioaddr);
+	if (ret)
+		return ret;
+
 	platform_set_drvdata(pdev, host_smmu);
 
 	/* Hypervisor parameters */
@@ -296,6 +621,7 @@  static int kvm_arm_smmu_remove(struct platform_device *pdev)
 	 * There was an error during hypervisor setup. The hyp driver may
 	 * have already enabled the device, so disable it.
 	 */
+	arm_smmu_unregister_iommu(smmu);
 	arm_smmu_device_disable(smmu);
 	arm_smmu_update_gbpa(smmu, host_smmu->boot_gbpa, GBPA_ABORT);
 	return 0;