diff mbox series

[v2,5/5] vfio/iommu_type1: Simplify group attachment

Message ID 20220616000304.23890-6-nicolinc@nvidia.com (mailing list archive)
State Superseded
Headers show
Series Simplify vfio_iommu_type1 attach/detach routine | expand

Commit Message

Nicolin Chen June 16, 2022, 12:03 a.m. UTC
Un-inline the domain specific logic from the attach/detach_group ops into
two paired functions vfio_iommu_alloc_attach_domain() and
vfio_iommu_detach_destroy_domain() that strictly deal with creating and
destroying struct vfio_domains.

Add the logic to check for EMEDIUMTYPE return code of iommu_attach_group()
and avoid the extra domain allocations and attach/detach sequences of the
old code. This allows properly detecting an actual attach error, like
-ENOMEM, vs treating all attach errors as an incompatible domain.

Co-developed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/vfio/vfio_iommu_type1.c | 298 +++++++++++++++++---------------
 1 file changed, 163 insertions(+), 135 deletions(-)

Comments

Tian, Kevin June 16, 2022, 7:08 a.m. UTC | #1
> From: Nicolin Chen
> Sent: Thursday, June 16, 2022 8:03 AM
> 
> Un-inline the domain specific logic from the attach/detach_group ops into
> two paired functions vfio_iommu_alloc_attach_domain() and
> vfio_iommu_detach_destroy_domain() that strictly deal with creating and
> destroying struct vfio_domains.
> 
> Add the logic to check for EMEDIUMTYPE return code of
> iommu_attach_group()
> and avoid the extra domain allocations and attach/detach sequences of the
> old code. This allows properly detecting an actual attach error, like
> -ENOMEM, vs treating all attach errors as an incompatible domain.
> 
> Co-developed-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 298 +++++++++++++++++---------------
>  1 file changed, 163 insertions(+), 135 deletions(-)
> 

...
> +static struct vfio_domain *
> +vfio_iommu_alloc_attach_domain(struct bus_type *bus, struct vfio_iommu
> *iommu,
> +			       struct vfio_iommu_group *group)
> +{
> +	struct iommu_domain *new_domain;
> +	struct vfio_domain *domain;
> +	int ret = 0;
> +
> +	/* Try to match an existing compatible domain */
> +	list_for_each_entry (domain, &iommu->domain_list, next) {
> +		ret = iommu_attach_group(domain->domain, group-
> >iommu_group);
> +		if (ret == -EMEDIUMTYPE)
> +			continue;

Probably good to add one line comment here for what EMEDIUMTYPE
represents. It's not a widely-used retry type like EAGAIN. A comment
can save the time of digging out the fact by jumping to iommu file.

...
> -	if (resv_msi) {
> +	if (resv_msi && !domain->msi_cookie) {
>  		ret = iommu_get_msi_cookie(domain->domain,
> resv_msi_base);
>  		if (ret && ret != -ENODEV)
>  			goto out_detach;
> +		domain->msi_cookie = true;
>  	}

why not moving to alloc_attach_domain() then no need for the new
domain field? It's required only when a new domain is allocated.

...
> -		if (list_empty(&domain->group_list)) {
> -			if (list_is_singular(&iommu->domain_list)) {
> -				if (list_empty(&iommu-
> >emulated_iommu_groups)) {
> -					WARN_ON(iommu->notifier.head);
> -
> 	vfio_iommu_unmap_unpin_all(iommu);
> -				} else {
> -
> 	vfio_iommu_unmap_unpin_reaccount(iommu);
> -				}
> -			}
> -			iommu_domain_free(domain->domain);
> -			list_del(&domain->next);
> -			kfree(domain);
> -			vfio_iommu_aper_expand(iommu, &iova_copy);

Previously the aperture is adjusted when a domain is freed...

> -			vfio_update_pgsize_bitmap(iommu);
> -		}
> -		/*
> -		 * Removal of a group without dirty tracking may allow
> -		 * the iommu scope to be promoted.
> -		 */
> -		if (!group->pinned_page_dirty_scope) {
> -			iommu->num_non_pinned_groups--;
> -			if (iommu->dirty_page_tracking)
> -				vfio_iommu_populate_bitmap_full(iommu);
> -		}
> +		vfio_iommu_detach_destroy_domain(domain, iommu,
> group);
>  		kfree(group);
>  		break;
>  	}
> 
> +	vfio_iommu_aper_expand(iommu, &iova_copy);

but now it's done for every group detach. The aperture is decided
by domain geometry which is not affected by attached groups.
Nicolin Chen June 16, 2022, 10:40 p.m. UTC | #2
On Thu, Jun 16, 2022 at 07:08:10AM +0000, Tian, Kevin wrote:
> ...
> > +static struct vfio_domain *
> > +vfio_iommu_alloc_attach_domain(struct bus_type *bus, struct vfio_iommu
> > *iommu,
> > +                            struct vfio_iommu_group *group)
> > +{
> > +     struct iommu_domain *new_domain;
> > +     struct vfio_domain *domain;
> > +     int ret = 0;
> > +
> > +     /* Try to match an existing compatible domain */
> > +     list_for_each_entry (domain, &iommu->domain_list, next) {
> > +             ret = iommu_attach_group(domain->domain, group-
> > >iommu_group);
> > +             if (ret == -EMEDIUMTYPE)
> > +                     continue;
> 
> Probably good to add one line comment here for what EMEDIUMTYPE
> represents. It's not a widely-used retry type like EAGAIN. A comment
> can save the time of digging out the fact by jumping to iommu file.

Sure. I can add that.

> ...
> > -     if (resv_msi) {
> > +     if (resv_msi && !domain->msi_cookie) {
> >               ret = iommu_get_msi_cookie(domain->domain,
> > resv_msi_base);
> >               if (ret && ret != -ENODEV)
> >                       goto out_detach;
> > +             domain->msi_cookie = true;
> >       }
> 
> why not moving to alloc_attach_domain() then no need for the new
> domain field? It's required only when a new domain is allocated.

When reusing an existing domain that doesn't have an msi_cookie,
we can do iommu_get_msi_cookie() if resv_msi is found. So it is
not limited to a new domain.

> ...
> > -             if (list_empty(&domain->group_list)) {
> > -                     if (list_is_singular(&iommu->domain_list)) {
> > -                             if (list_empty(&iommu-
> > >emulated_iommu_groups)) {
> > -                                     WARN_ON(iommu->notifier.head);
> > -
> >       vfio_iommu_unmap_unpin_all(iommu);
> > -                             } else {
> > -
> >       vfio_iommu_unmap_unpin_reaccount(iommu);
> > -                             }
> > -                     }
> > -                     iommu_domain_free(domain->domain);
> > -                     list_del(&domain->next);
> > -                     kfree(domain);
> > -                     vfio_iommu_aper_expand(iommu, &iova_copy);
> 
> Previously the aperture is adjusted when a domain is freed...
> 
> > -                     vfio_update_pgsize_bitmap(iommu);
> > -             }
> > -             /*
> > -              * Removal of a group without dirty tracking may allow
> > -              * the iommu scope to be promoted.
> > -              */
> > -             if (!group->pinned_page_dirty_scope) {
> > -                     iommu->num_non_pinned_groups--;
> > -                     if (iommu->dirty_page_tracking)
> > -                             vfio_iommu_populate_bitmap_full(iommu);
> > -             }
> > +             vfio_iommu_detach_destroy_domain(domain, iommu,
> > group);
> >               kfree(group);
> >               break;
> >       }
> >
> > +     vfio_iommu_aper_expand(iommu, &iova_copy);
> 
> but now it's done for every group detach. The aperture is decided
> by domain geometry which is not affected by attached groups.

Yea, I've noticed this part. Actually Jason did this change for
simplicity, and I think it'd be safe to do so?
Tian, Kevin June 17, 2022, 2:53 a.m. UTC | #3
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Friday, June 17, 2022 6:41 AM
> 
> > ...
> > > -     if (resv_msi) {
> > > +     if (resv_msi && !domain->msi_cookie) {
> > >               ret = iommu_get_msi_cookie(domain->domain,
> > > resv_msi_base);
> > >               if (ret && ret != -ENODEV)
> > >                       goto out_detach;
> > > +             domain->msi_cookie = true;
> > >       }
> >
> > why not moving to alloc_attach_domain() then no need for the new
> > domain field? It's required only when a new domain is allocated.
> 
> When reusing an existing domain that doesn't have an msi_cookie,
> we can do iommu_get_msi_cookie() if resv_msi is found. So it is
> not limited to a new domain.

Looks msi_cookie requirement is per platform (currently only
for smmu. see arm_smmu_get_resv_regions()). If there is
no mixed case then above check is not required.

But let's hear whether Robin has a different thought here.

> 
> > ...
> > > -             if (list_empty(&domain->group_list)) {
> > > -                     if (list_is_singular(&iommu->domain_list)) {
> > > -                             if (list_empty(&iommu-
> > > >emulated_iommu_groups)) {
> > > -                                     WARN_ON(iommu->notifier.head);
> > > -
> > >       vfio_iommu_unmap_unpin_all(iommu);
> > > -                             } else {
> > > -
> > >       vfio_iommu_unmap_unpin_reaccount(iommu);
> > > -                             }
> > > -                     }
> > > -                     iommu_domain_free(domain->domain);
> > > -                     list_del(&domain->next);
> > > -                     kfree(domain);
> > > -                     vfio_iommu_aper_expand(iommu, &iova_copy);
> >
> > Previously the aperture is adjusted when a domain is freed...
> >
> > > -                     vfio_update_pgsize_bitmap(iommu);
> > > -             }
> > > -             /*
> > > -              * Removal of a group without dirty tracking may allow
> > > -              * the iommu scope to be promoted.
> > > -              */
> > > -             if (!group->pinned_page_dirty_scope) {
> > > -                     iommu->num_non_pinned_groups--;
> > > -                     if (iommu->dirty_page_tracking)
> > > -                             vfio_iommu_populate_bitmap_full(iommu);
> > > -             }
> > > +             vfio_iommu_detach_destroy_domain(domain, iommu,
> > > group);
> > >               kfree(group);
> > >               break;
> > >       }
> > >
> > > +     vfio_iommu_aper_expand(iommu, &iova_copy);
> >
> > but now it's done for every group detach. The aperture is decided
> > by domain geometry which is not affected by attached groups.
> 
> Yea, I've noticed this part. Actually Jason did this change for
> simplicity, and I think it'd be safe to do so?

Perhaps detach_destroy() can return a Boolean to indicate whether
a domain is destroyed.
Nicolin Chen June 17, 2022, 11:07 p.m. UTC | #4
On Fri, Jun 17, 2022 at 02:53:13AM +0000, Tian, Kevin wrote:
> > > ...
> > > > -     if (resv_msi) {
> > > > +     if (resv_msi && !domain->msi_cookie) {
> > > >               ret = iommu_get_msi_cookie(domain->domain,
> > > > resv_msi_base);
> > > >               if (ret && ret != -ENODEV)
> > > >                       goto out_detach;
> > > > +             domain->msi_cookie = true;
> > > >       }
> > >
> > > why not moving to alloc_attach_domain() then no need for the new
> > > domain field? It's required only when a new domain is allocated.
> >
> > When reusing an existing domain that doesn't have an msi_cookie,
> > we can do iommu_get_msi_cookie() if resv_msi is found. So it is
> > not limited to a new domain.
> 
> Looks msi_cookie requirement is per platform (currently only
> for smmu. see arm_smmu_get_resv_regions()). If there is
> no mixed case then above check is not required.

Do you mean "reusing existing domain" for the "mixed case"?

> But let's hear whether Robin has a different thought here.

Yea, sure.

> > > > -                     iommu_domain_free(domain->domain);
> > > > -                     list_del(&domain->next);
> > > > -                     kfree(domain);
> > > > -                     vfio_iommu_aper_expand(iommu, &iova_copy);
> > >
> > > Previously the aperture is adjusted when a domain is freed...
> > >
> > > > -                     vfio_update_pgsize_bitmap(iommu);
> > > > -             }
> > > > -             /*
> > > > -              * Removal of a group without dirty tracking may allow
> > > > -              * the iommu scope to be promoted.
> > > > -              */
> > > > -             if (!group->pinned_page_dirty_scope) {
> > > > -                     iommu->num_non_pinned_groups--;
> > > > -                     if (iommu->dirty_page_tracking)
> > > > -                             vfio_iommu_populate_bitmap_full(iommu);
> > > > -             }
> > > > +             vfio_iommu_detach_destroy_domain(domain, iommu,
> > > > group);
> > > >               kfree(group);
> > > >               break;
> > > >       }
> > > >
> > > > +     vfio_iommu_aper_expand(iommu, &iova_copy);
> > >
> > > but now it's done for every group detach. The aperture is decided
> > > by domain geometry which is not affected by attached groups.
> >
> > Yea, I've noticed this part. Actually Jason did this change for
> > simplicity, and I think it'd be safe to do so?
> 
> Perhaps detach_destroy() can return a Boolean to indicate whether
> a domain is destroyed.

It could be a solution but doesn't feel that common for a clean
function to have a return value indicating a special case. Maybe
passing in "&domain" so that we can check if it's NULL after?
Jason Gunthorpe June 20, 2022, 4:03 a.m. UTC | #5
On Fri, Jun 17, 2022 at 04:07:20PM -0700, Nicolin Chen wrote:

> > > > > +     vfio_iommu_aper_expand(iommu, &iova_copy);
> > > >
> > > > but now it's done for every group detach. The aperture is decided
> > > > by domain geometry which is not affected by attached groups.
> > >
> > > Yea, I've noticed this part. Actually Jason did this change for
> > > simplicity, and I think it'd be safe to do so?
> > 
> > Perhaps detach_destroy() can return a Boolean to indicate whether
> > a domain is destroyed.
> 
> It could be a solution but doesn't feel that common for a clean
> function to have a return value indicating a special case. Maybe
> passing in "&domain" so that we can check if it's NULL after?

It is harmless to do every time, it just burns a few CPU cycles on a
slow path. We don't need complexity to optmize it.

Jason
Robin Murphy June 20, 2022, 10:11 a.m. UTC | #6
On 2022-06-17 03:53, Tian, Kevin wrote:
>> From: Nicolin Chen <nicolinc@nvidia.com>
>> Sent: Friday, June 17, 2022 6:41 AM
>>
>>> ...
>>>> -     if (resv_msi) {
>>>> +     if (resv_msi && !domain->msi_cookie) {
>>>>                ret = iommu_get_msi_cookie(domain->domain,
>>>> resv_msi_base);
>>>>                if (ret && ret != -ENODEV)
>>>>                        goto out_detach;
>>>> +             domain->msi_cookie = true;
>>>>        }
>>>
>>> why not moving to alloc_attach_domain() then no need for the new
>>> domain field? It's required only when a new domain is allocated.
>>
>> When reusing an existing domain that doesn't have an msi_cookie,
>> we can do iommu_get_msi_cookie() if resv_msi is found. So it is
>> not limited to a new domain.
> 
> Looks msi_cookie requirement is per platform (currently only
> for smmu. see arm_smmu_get_resv_regions()). If there is
> no mixed case then above check is not required.
> 
> But let's hear whether Robin has a different thought here.

Yes, the cookie should logically be tied to the lifetime of the domain 
itself. In the relevant context, "an existing domain that doesn't have 
an msi_cookie" should never exist.

Thanks,
Robin.
Nicolin Chen June 21, 2022, 8:59 p.m. UTC | #7
On Mon, Jun 20, 2022 at 01:03:17AM -0300, Jason Gunthorpe wrote:
> On Fri, Jun 17, 2022 at 04:07:20PM -0700, Nicolin Chen wrote:
> 
> > > > > > +     vfio_iommu_aper_expand(iommu, &iova_copy);
> > > > >
> > > > > but now it's done for every group detach. The aperture is decided
> > > > > by domain geometry which is not affected by attached groups.
> > > >
> > > > Yea, I've noticed this part. Actually Jason did this change for
> > > > simplicity, and I think it'd be safe to do so?
> > > 
> > > Perhaps detach_destroy() can return a Boolean to indicate whether
> > > a domain is destroyed.
> > 
> > It could be a solution but doesn't feel that common for a clean
> > function to have a return value indicating a special case. Maybe
> > passing in "&domain" so that we can check if it's NULL after?
> 
> It is harmless to do every time, it just burns a few CPU cycles on a
> slow path. We don't need complexity to optmize it.

OK. I will keep it simple then. If Kevin or other has a further
objection, please let us know.

Thanks
Nic
Nicolin Chen June 21, 2022, 9:08 p.m. UTC | #8
On Mon, Jun 20, 2022 at 11:11:01AM +0100, Robin Murphy wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 2022-06-17 03:53, Tian, Kevin wrote:
> > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > Sent: Friday, June 17, 2022 6:41 AM
> > > 
> > > > ...
> > > > > -     if (resv_msi) {
> > > > > +     if (resv_msi && !domain->msi_cookie) {
> > > > >                ret = iommu_get_msi_cookie(domain->domain,
> > > > > resv_msi_base);
> > > > >                if (ret && ret != -ENODEV)
> > > > >                        goto out_detach;
> > > > > +             domain->msi_cookie = true;
> > > > >        }
> > > > 
> > > > why not moving to alloc_attach_domain() then no need for the new
> > > > domain field? It's required only when a new domain is allocated.
> > > 
> > > When reusing an existing domain that doesn't have an msi_cookie,
> > > we can do iommu_get_msi_cookie() if resv_msi is found. So it is
> > > not limited to a new domain.
> > 
> > Looks msi_cookie requirement is per platform (currently only
> > for smmu. see arm_smmu_get_resv_regions()). If there is
> > no mixed case then above check is not required.
> > 
> > But let's hear whether Robin has a different thought here.
> 
> Yes, the cookie should logically be tied to the lifetime of the domain
> itself. In the relevant context, "an existing domain that doesn't have
> an msi_cookie" should never exist.

Thanks for the explanation. I will move the iommu_get_msi_cookie()
into alloc_attach_domain(), as Kevin suggested.
diff mbox series

Patch

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 573caf320788..5986c68e59ee 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -86,6 +86,7 @@  struct vfio_domain {
 	struct list_head	group_list;
 	bool			fgsp : 1;	/* Fine-grained super pages */
 	bool			enforce_cache_coherency : 1;
+	bool			msi_cookie : 1;
 };
 
 struct vfio_dma {
@@ -2153,12 +2154,163 @@  static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
 	list_splice_tail(iova_copy, iova);
 }
 
+static struct vfio_domain *
+vfio_iommu_alloc_attach_domain(struct bus_type *bus, struct vfio_iommu *iommu,
+			       struct vfio_iommu_group *group)
+{
+	struct iommu_domain *new_domain;
+	struct vfio_domain *domain;
+	int ret = 0;
+
+	/* Try to match an existing compatible domain */
+	list_for_each_entry (domain, &iommu->domain_list, next) {
+		ret = iommu_attach_group(domain->domain, group->iommu_group);
+		if (ret == -EMEDIUMTYPE)
+			continue;
+		if (ret)
+			return ERR_PTR(ret);
+		goto done;
+	}
+
+	new_domain = iommu_domain_alloc(bus);
+	if (!new_domain)
+		return ERR_PTR(-EIO);
+
+	if (iommu->nesting) {
+		ret = iommu_enable_nesting(new_domain);
+		if (ret)
+			goto out_free_iommu_domain;
+	}
+
+	ret = iommu_attach_group(new_domain, group->iommu_group);
+	if (ret)
+		goto out_free_iommu_domain;
+
+	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+	if (!domain) {
+		ret = -ENOMEM;
+		goto out_detach;
+	}
+
+	domain->domain = new_domain;
+	vfio_test_domain_fgsp(domain);
+
+	/*
+	 * If the IOMMU can block non-coherent operations (ie PCIe TLPs with
+	 * no-snoop set) then VFIO always turns this feature on because on Intel
+	 * platforms it optimizes KVM to disable wbinvd emulation.
+	 */
+	if (new_domain->ops->enforce_cache_coherency)
+		domain->enforce_cache_coherency =
+			new_domain->ops->enforce_cache_coherency(new_domain);
+
+	/* replay mappings on new domains */
+	ret = vfio_iommu_replay(iommu, domain);
+	if (ret)
+		goto out_free_domain;
+
+	INIT_LIST_HEAD(&domain->group_list);
+	list_add(&domain->next, &iommu->domain_list);
+	vfio_update_pgsize_bitmap(iommu);
+
+done:
+	list_add(&group->next, &domain->group_list);
+
+	/*
+	 * An iommu backed group can dirty memory directly and therefore
+	 * demotes the iommu scope until it declares itself dirty tracking
+	 * capable via the page pinning interface.
+	 */
+	iommu->num_non_pinned_groups++;
+
+	return domain;
+
+out_free_domain:
+	kfree(domain);
+out_detach:
+	iommu_detach_group(new_domain, group->iommu_group);
+out_free_iommu_domain:
+	iommu_domain_free(new_domain);
+	return ERR_PTR(ret);
+}
+
+static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
+{
+	struct rb_node *node;
+
+	while ((node = rb_first(&iommu->dma_list)))
+		vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
+}
+
+static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
+{
+	struct rb_node *n, *p;
+
+	n = rb_first(&iommu->dma_list);
+	for (; n; n = rb_next(n)) {
+		struct vfio_dma *dma;
+		long locked = 0, unlocked = 0;
+
+		dma = rb_entry(n, struct vfio_dma, node);
+		unlocked += vfio_unmap_unpin(iommu, dma, false);
+		p = rb_first(&dma->pfn_list);
+		for (; p; p = rb_next(p)) {
+			struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn,
+							 node);
+
+			if (!is_invalid_reserved_pfn(vpfn->pfn))
+				locked++;
+		}
+		vfio_lock_acct(dma, locked - unlocked, true);
+	}
+}
+
+static void vfio_iommu_detach_destroy_domain(struct vfio_domain *domain,
+					     struct vfio_iommu *iommu,
+					     struct vfio_iommu_group *group)
+{
+	iommu_detach_group(domain->domain, group->iommu_group);
+	list_del(&group->next);
+	if (!list_empty(&domain->group_list))
+		goto out_dirty;
+
+	/*
+	 * Group ownership provides privilege, if the group list is empty, the
+	 * domain goes away. If it's the last domain with iommu and external
+	 * domain doesn't exist, then all the mappings go away too. If it's the
+	 * last domain with iommu and external domain exist, update accounting
+	 */
+	if (list_is_singular(&iommu->domain_list)) {
+		if (list_empty(&iommu->emulated_iommu_groups)) {
+			WARN_ON(iommu->notifier.head);
+			vfio_iommu_unmap_unpin_all(iommu);
+		} else {
+			vfio_iommu_unmap_unpin_reaccount(iommu);
+		}
+	}
+	iommu_domain_free(domain->domain);
+	list_del(&domain->next);
+	kfree(domain);
+	vfio_update_pgsize_bitmap(iommu);
+
+out_dirty:
+	/*
+	 * Removal of a group without dirty tracking may allow the iommu scope
+	 * to be promoted.
+	 */
+	if (!group->pinned_page_dirty_scope) {
+		iommu->num_non_pinned_groups--;
+		if (iommu->dirty_page_tracking)
+			vfio_iommu_populate_bitmap_full(iommu);
+	}
+}
+
 static int vfio_iommu_type1_attach_group(void *iommu_data,
 		struct iommu_group *iommu_group, enum vfio_group_type type)
 {
 	struct vfio_iommu *iommu = iommu_data;
 	struct vfio_iommu_group *group;
-	struct vfio_domain *domain, *d;
+	struct vfio_domain *domain;
 	struct bus_type *bus = NULL;
 	bool resv_msi, msi_remap;
 	phys_addr_t resv_msi_base = 0;
@@ -2197,26 +2349,12 @@  static int vfio_iommu_type1_attach_group(void *iommu_data,
 	if (ret)
 		goto out_free_group;
 
-	ret = -ENOMEM;
-	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
-	if (!domain)
+	domain = vfio_iommu_alloc_attach_domain(bus, iommu, group);
+	if (IS_ERR(domain)) {
+		ret = PTR_ERR(domain);
 		goto out_free_group;
-
-	ret = -EIO;
-	domain->domain = iommu_domain_alloc(bus);
-	if (!domain->domain)
-		goto out_free_domain;
-
-	if (iommu->nesting) {
-		ret = iommu_enable_nesting(domain->domain);
-		if (ret)
-			goto out_domain;
 	}
 
-	ret = iommu_attach_group(domain->domain, group->iommu_group);
-	if (ret)
-		goto out_domain;
-
 	/* Get aperture info */
 	geo = &domain->domain->geometry;
 	if (vfio_iommu_aper_conflict(iommu, geo->aperture_start,
@@ -2254,9 +2392,6 @@  static int vfio_iommu_type1_attach_group(void *iommu_data,
 
 	resv_msi = vfio_iommu_has_sw_msi(&group_resv_regions, &resv_msi_base);
 
-	INIT_LIST_HEAD(&domain->group_list);
-	list_add(&group->next, &domain->group_list);
-
 	msi_remap = irq_domain_check_msi_remap() ||
 		    iommu_capable(bus, IOMMU_CAP_INTR_REMAP);
 
@@ -2267,107 +2402,32 @@  static int vfio_iommu_type1_attach_group(void *iommu_data,
 		goto out_detach;
 	}
 
-	/*
-	 * If the IOMMU can block non-coherent operations (ie PCIe TLPs with
-	 * no-snoop set) then VFIO always turns this feature on because on Intel
-	 * platforms it optimizes KVM to disable wbinvd emulation.
-	 */
-	if (domain->domain->ops->enforce_cache_coherency)
-		domain->enforce_cache_coherency =
-			domain->domain->ops->enforce_cache_coherency(
-				domain->domain);
-
-	/* Try to match an existing compatible domain */
-	list_for_each_entry(d, &iommu->domain_list, next) {
-		iommu_detach_group(domain->domain, group->iommu_group);
-		if (!iommu_attach_group(d->domain, group->iommu_group)) {
-			list_add(&group->next, &d->group_list);
-			iommu_domain_free(domain->domain);
-			kfree(domain);
-			goto done;
-		}
-
-		ret = iommu_attach_group(domain->domain,  group->iommu_group);
-		if (ret)
-			goto out_domain;
-	}
-
-	vfio_test_domain_fgsp(domain);
-
-	/* replay mappings on new domains */
-	ret = vfio_iommu_replay(iommu, domain);
-	if (ret)
-		goto out_detach;
-
-	if (resv_msi) {
+	if (resv_msi && !domain->msi_cookie) {
 		ret = iommu_get_msi_cookie(domain->domain, resv_msi_base);
 		if (ret && ret != -ENODEV)
 			goto out_detach;
+		domain->msi_cookie = true;
 	}
 
-	list_add(&domain->next, &iommu->domain_list);
-	vfio_update_pgsize_bitmap(iommu);
-done:
 	/* Delete the old one and insert new iova list */
 	vfio_iommu_iova_insert_copy(iommu, &iova_copy);
 
-	/*
-	 * An iommu backed group can dirty memory directly and therefore
-	 * demotes the iommu scope until it declares itself dirty tracking
-	 * capable via the page pinning interface.
-	 */
-	iommu->num_non_pinned_groups++;
 	mutex_unlock(&iommu->lock);
 	vfio_iommu_resv_free(&group_resv_regions);
 
 	return 0;
 
 out_detach:
-	iommu_detach_group(domain->domain, group->iommu_group);
-out_domain:
-	iommu_domain_free(domain->domain);
-	vfio_iommu_iova_free(&iova_copy);
-	vfio_iommu_resv_free(&group_resv_regions);
-out_free_domain:
-	kfree(domain);
+	vfio_iommu_detach_destroy_domain(domain, iommu, group);
 out_free_group:
 	kfree(group);
 out_unlock:
 	mutex_unlock(&iommu->lock);
+	vfio_iommu_iova_free(&iova_copy);
+	vfio_iommu_resv_free(&group_resv_regions);
 	return ret;
 }
 
-static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
-{
-	struct rb_node *node;
-
-	while ((node = rb_first(&iommu->dma_list)))
-		vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
-}
-
-static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
-{
-	struct rb_node *n, *p;
-
-	n = rb_first(&iommu->dma_list);
-	for (; n; n = rb_next(n)) {
-		struct vfio_dma *dma;
-		long locked = 0, unlocked = 0;
-
-		dma = rb_entry(n, struct vfio_dma, node);
-		unlocked += vfio_unmap_unpin(iommu, dma, false);
-		p = rb_first(&dma->pfn_list);
-		for (; p; p = rb_next(p)) {
-			struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn,
-							 node);
-
-			if (!is_invalid_reserved_pfn(vpfn->pfn))
-				locked++;
-		}
-		vfio_lock_acct(dma, locked - unlocked, true);
-	}
-}
-
 /*
  * Called when a domain is removed in detach. It is possible that
  * the removed domain decided the iova aperture window. Modify the
@@ -2481,44 +2541,12 @@  static void vfio_iommu_type1_detach_group(void *iommu_data,
 		group = find_iommu_group(domain, iommu_group);
 		if (!group)
 			continue;
-
-		iommu_detach_group(domain->domain, group->iommu_group);
-		list_del(&group->next);
-		/*
-		 * Group ownership provides privilege, if the group list is
-		 * empty, the domain goes away. If it's the last domain with
-		 * iommu and external domain doesn't exist, then all the
-		 * mappings go away too. If it's the last domain with iommu and
-		 * external domain exist, update accounting
-		 */
-		if (list_empty(&domain->group_list)) {
-			if (list_is_singular(&iommu->domain_list)) {
-				if (list_empty(&iommu->emulated_iommu_groups)) {
-					WARN_ON(iommu->notifier.head);
-					vfio_iommu_unmap_unpin_all(iommu);
-				} else {
-					vfio_iommu_unmap_unpin_reaccount(iommu);
-				}
-			}
-			iommu_domain_free(domain->domain);
-			list_del(&domain->next);
-			kfree(domain);
-			vfio_iommu_aper_expand(iommu, &iova_copy);
-			vfio_update_pgsize_bitmap(iommu);
-		}
-		/*
-		 * Removal of a group without dirty tracking may allow
-		 * the iommu scope to be promoted.
-		 */
-		if (!group->pinned_page_dirty_scope) {
-			iommu->num_non_pinned_groups--;
-			if (iommu->dirty_page_tracking)
-				vfio_iommu_populate_bitmap_full(iommu);
-		}
+		vfio_iommu_detach_destroy_domain(domain, iommu, group);
 		kfree(group);
 		break;
 	}
 
+	vfio_iommu_aper_expand(iommu, &iova_copy);
 	if (!vfio_iommu_resv_refresh(iommu, &iova_copy))
 		vfio_iommu_iova_insert_copy(iommu, &iova_copy);
 	else