diff mbox series

[RFC,10/11] vfio/iommu_type1: Optimize dirty bitmap population based on iommu HWDBM

Message ID 20210128151742.18840-11-zhukeqian1@huawei.com (mailing list archive)
State New, archived
Headers show
Series vfio/iommu_type1: Implement dirty log tracking based on smmuv3 HTTU | expand

Commit Message

zhukeqian Jan. 28, 2021, 3:17 p.m. UTC
From: jiangkunkun <jiangkunkun@huawei.com>

In the past if vfio_iommu is not of pinned_page_dirty_scope and
vfio_dma is iommu_mapped, we populate full dirty bitmap for this
vfio_dma. Now we can try to get dirty log from iommu before make
the lousy decision.

Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
---
 drivers/vfio/vfio_iommu_type1.c | 97 ++++++++++++++++++++++++++++++++-
 1 file changed, 94 insertions(+), 3 deletions(-)

Comments

Yi Sun Feb. 7, 2021, 9:56 a.m. UTC | #1
Hi,

On 21-01-28 23:17:41, Keqian Zhu wrote:

[...]

> +static void vfio_dma_dirty_log_start(struct vfio_iommu *iommu,
> +				     struct vfio_dma *dma)
> +{
> +	struct vfio_domain *d;
> +
> +	list_for_each_entry(d, &iommu->domain_list, next) {
> +		/* Go through all domain anyway even if we fail */
> +		iommu_split_block(d->domain, dma->iova, dma->size);
> +	}
> +}

This should be a switch to prepare for dirty log start. Per Intel
Vtd spec, there is SLADE defined in Scalable-Mode PASID Table Entry.
It enables Accessed/Dirty Flags in second-level paging entries.
So, a generic iommu interface here is better. For Intel iommu, it
enables SLADE. For ARM, it splits block.

> +
> +static void vfio_dma_dirty_log_stop(struct vfio_iommu *iommu,
> +				    struct vfio_dma *dma)
> +{
> +	struct vfio_domain *d;
> +
> +	list_for_each_entry(d, &iommu->domain_list, next) {
> +		/* Go through all domain anyway even if we fail */
> +		iommu_merge_page(d->domain, dma->iova, dma->size,
> +				 d->prot | dma->prot);
> +	}
> +}

Same as above comment, a generic interface is required here.

> +
> +static void vfio_iommu_dirty_log_switch(struct vfio_iommu *iommu, bool start)
> +{
> +	struct rb_node *n;
> +
> +	/* Split and merge even if all iommu don't support HWDBM now */
> +	for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
> +		struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
> +
> +		if (!dma->iommu_mapped)
> +			continue;
> +
> +		/* Go through all dma range anyway even if we fail */
> +		if (start)
> +			vfio_dma_dirty_log_start(iommu, dma);
> +		else
> +			vfio_dma_dirty_log_stop(iommu, dma);
> +	}
> +}
> +
>  static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
>  					unsigned long arg)
>  {
> @@ -2812,8 +2900,10 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
>  		pgsize = 1 << __ffs(iommu->pgsize_bitmap);
>  		if (!iommu->dirty_page_tracking) {
>  			ret = vfio_dma_bitmap_alloc_all(iommu, pgsize);
> -			if (!ret)
> +			if (!ret) {
>  				iommu->dirty_page_tracking = true;
> +				vfio_iommu_dirty_log_switch(iommu, true);
> +			}
>  		}
>  		mutex_unlock(&iommu->lock);
>  		return ret;
> @@ -2822,6 +2912,7 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
>  		if (iommu->dirty_page_tracking) {
>  			iommu->dirty_page_tracking = false;
>  			vfio_dma_bitmap_free_all(iommu);
> +			vfio_iommu_dirty_log_switch(iommu, false);
>  		}
>  		mutex_unlock(&iommu->lock);
>  		return 0;
> -- 
> 2.19.1
zhukeqian Feb. 7, 2021, 10:40 a.m. UTC | #2
Hi Yi,

On 2021/2/7 17:56, Yi Sun wrote:
> Hi,
> 
> On 21-01-28 23:17:41, Keqian Zhu wrote:
> 
> [...]
> 
>> +static void vfio_dma_dirty_log_start(struct vfio_iommu *iommu,
>> +				     struct vfio_dma *dma)
>> +{
>> +	struct vfio_domain *d;
>> +
>> +	list_for_each_entry(d, &iommu->domain_list, next) {
>> +		/* Go through all domain anyway even if we fail */
>> +		iommu_split_block(d->domain, dma->iova, dma->size);
>> +	}
>> +}
> 
> This should be a switch to prepare for dirty log start. Per Intel
> Vtd spec, there is SLADE defined in Scalable-Mode PASID Table Entry.
> It enables Accessed/Dirty Flags in second-level paging entries.
> So, a generic iommu interface here is better. For Intel iommu, it
> enables SLADE. For ARM, it splits block.
Indeed, a generic interface name is better.

The vendor iommu driver plays vendor's specific actions to start dirty log, and Intel iommu and ARM smmu may differ. Besides, we may add more actions in ARM smmu driver in future.

One question: Though I am not familiar with Intel iommu, I think it also should split block mapping besides enable SLADE. Right?

Thanks,
Keqian
Robin Murphy Feb. 9, 2021, 11:16 a.m. UTC | #3
On 2021-02-07 09:56, Yi Sun wrote:
> Hi,
> 
> On 21-01-28 23:17:41, Keqian Zhu wrote:
> 
> [...]
> 
>> +static void vfio_dma_dirty_log_start(struct vfio_iommu *iommu,
>> +				     struct vfio_dma *dma)
>> +{
>> +	struct vfio_domain *d;
>> +
>> +	list_for_each_entry(d, &iommu->domain_list, next) {
>> +		/* Go through all domain anyway even if we fail */
>> +		iommu_split_block(d->domain, dma->iova, dma->size);
>> +	}
>> +}
> 
> This should be a switch to prepare for dirty log start. Per Intel
> Vtd spec, there is SLADE defined in Scalable-Mode PASID Table Entry.
> It enables Accessed/Dirty Flags in second-level paging entries.
> So, a generic iommu interface here is better. For Intel iommu, it
> enables SLADE. For ARM, it splits block.

 From a quick look, VT-D's SLADE and SMMU's HTTU appear to be the exact 
same thing. This step isn't about enabling or disabling that feature 
itself (the proposal for SMMU is to simply leave HTTU enabled all the 
time), it's about controlling the granularity at which the dirty status 
can be detected/reported at all, since that's tied to the pagetable 
structure.

However, if an IOMMU were to come along with some other way of reporting 
dirty status that didn't depend on the granularity of individual 
mappings, then indeed it wouldn't need this operation.

Robin.

>> +
>> +static void vfio_dma_dirty_log_stop(struct vfio_iommu *iommu,
>> +				    struct vfio_dma *dma)
>> +{
>> +	struct vfio_domain *d;
>> +
>> +	list_for_each_entry(d, &iommu->domain_list, next) {
>> +		/* Go through all domain anyway even if we fail */
>> +		iommu_merge_page(d->domain, dma->iova, dma->size,
>> +				 d->prot | dma->prot);
>> +	}
>> +}
> 
> Same as above comment, a generic interface is required here.
> 
>> +
>> +static void vfio_iommu_dirty_log_switch(struct vfio_iommu *iommu, bool start)
>> +{
>> +	struct rb_node *n;
>> +
>> +	/* Split and merge even if all iommu don't support HWDBM now */
>> +	for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
>> +		struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
>> +
>> +		if (!dma->iommu_mapped)
>> +			continue;
>> +
>> +		/* Go through all dma range anyway even if we fail */
>> +		if (start)
>> +			vfio_dma_dirty_log_start(iommu, dma);
>> +		else
>> +			vfio_dma_dirty_log_stop(iommu, dma);
>> +	}
>> +}
>> +
>>   static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
>>   					unsigned long arg)
>>   {
>> @@ -2812,8 +2900,10 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
>>   		pgsize = 1 << __ffs(iommu->pgsize_bitmap);
>>   		if (!iommu->dirty_page_tracking) {
>>   			ret = vfio_dma_bitmap_alloc_all(iommu, pgsize);
>> -			if (!ret)
>> +			if (!ret) {
>>   				iommu->dirty_page_tracking = true;
>> +				vfio_iommu_dirty_log_switch(iommu, true);
>> +			}
>>   		}
>>   		mutex_unlock(&iommu->lock);
>>   		return ret;
>> @@ -2822,6 +2912,7 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
>>   		if (iommu->dirty_page_tracking) {
>>   			iommu->dirty_page_tracking = false;
>>   			vfio_dma_bitmap_free_all(iommu);
>> +			vfio_iommu_dirty_log_switch(iommu, false);
>>   		}
>>   		mutex_unlock(&iommu->lock);
>>   		return 0;
>> -- 
>> 2.19.1
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
>
Yi Sun Feb. 9, 2021, 11:57 a.m. UTC | #4
On 21-02-07 18:40:36, Keqian Zhu wrote:
> Hi Yi,
> 
> On 2021/2/7 17:56, Yi Sun wrote:
> > Hi,
> > 
> > On 21-01-28 23:17:41, Keqian Zhu wrote:
> > 
> > [...]
> > 
> >> +static void vfio_dma_dirty_log_start(struct vfio_iommu *iommu,
> >> +				     struct vfio_dma *dma)
> >> +{
> >> +	struct vfio_domain *d;
> >> +
> >> +	list_for_each_entry(d, &iommu->domain_list, next) {
> >> +		/* Go through all domain anyway even if we fail */
> >> +		iommu_split_block(d->domain, dma->iova, dma->size);
> >> +	}
> >> +}
> > 
> > This should be a switch to prepare for dirty log start. Per Intel
> > Vtd spec, there is SLADE defined in Scalable-Mode PASID Table Entry.
> > It enables Accessed/Dirty Flags in second-level paging entries.
> > So, a generic iommu interface here is better. For Intel iommu, it
> > enables SLADE. For ARM, it splits block.
> Indeed, a generic interface name is better.
> 
> The vendor iommu driver plays vendor's specific actions to start dirty log, and Intel iommu and ARM smmu may differ. Besides, we may add more actions in ARM smmu driver in future.
> 
> One question: Though I am not familiar with Intel iommu, I think it also should split block mapping besides enable SLADE. Right?
> 
I am not familiar with ARM smmu. :) So I want to clarify if the block
in smmu is big page, e.g. 2M page? Intel Vtd manages the memory per
page, 4KB/2MB/1GB. There are two ways to manage dirty pages.
1. Keep default granularity. Just set SLADE to enable the dirty track.
2. Split big page to 4KB to get finer granularity.

But question about the second solution is if it can benefit the user
space, e.g. live migration. If my understanding about smmu block (i.e.
the big page) is correct, have you collected some performance data to
prove that the split can improve performance? Thanks!

> Thanks,
> Keqian
Yi Sun Feb. 9, 2021, 12:02 p.m. UTC | #5
On 21-02-09 11:16:08, Robin Murphy wrote:
> On 2021-02-07 09:56, Yi Sun wrote:
> >Hi,
> >
> >On 21-01-28 23:17:41, Keqian Zhu wrote:
> >
> >[...]
> >
> >>+static void vfio_dma_dirty_log_start(struct vfio_iommu *iommu,
> >>+				     struct vfio_dma *dma)
> >>+{
> >>+	struct vfio_domain *d;
> >>+
> >>+	list_for_each_entry(d, &iommu->domain_list, next) {
> >>+		/* Go through all domain anyway even if we fail */
> >>+		iommu_split_block(d->domain, dma->iova, dma->size);
> >>+	}
> >>+}
> >
> >This should be a switch to prepare for dirty log start. Per Intel
> >Vtd spec, there is SLADE defined in Scalable-Mode PASID Table Entry.
> >It enables Accessed/Dirty Flags in second-level paging entries.
> >So, a generic iommu interface here is better. For Intel iommu, it
> >enables SLADE. For ARM, it splits block.
> 
> From a quick look, VT-D's SLADE and SMMU's HTTU appear to be the
> exact same thing. This step isn't about enabling or disabling that
> feature itself (the proposal for SMMU is to simply leave HTTU
> enabled all the time), it's about controlling the granularity at
> which the dirty status can be detected/reported at all, since that's
> tied to the pagetable structure.
> 
> However, if an IOMMU were to come along with some other way of
> reporting dirty status that didn't depend on the granularity of
> individual mappings, then indeed it wouldn't need this operation.
> 
Per my thought, we can use these two start/stop interfaces to make
user space decide when to start/stop the dirty tracking. For Intel
SLADE, I think we can enable this bit when this start interface is
called by user space. I don't think leave SLADE enabled all the time
is necessary for Intel Vt-d. So I suggest a generic interface here.
Thanks!

> Robin.
> 
> >>+
> >>+static void vfio_dma_dirty_log_stop(struct vfio_iommu *iommu,
> >>+				    struct vfio_dma *dma)
> >>+{
> >>+	struct vfio_domain *d;
> >>+
> >>+	list_for_each_entry(d, &iommu->domain_list, next) {
> >>+		/* Go through all domain anyway even if we fail */
> >>+		iommu_merge_page(d->domain, dma->iova, dma->size,
> >>+				 d->prot | dma->prot);
> >>+	}
> >>+}
> >
> >Same as above comment, a generic interface is required here.
> >
> >>+
> >>+static void vfio_iommu_dirty_log_switch(struct vfio_iommu *iommu, bool start)
> >>+{
> >>+	struct rb_node *n;
> >>+
> >>+	/* Split and merge even if all iommu don't support HWDBM now */
> >>+	for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
> >>+		struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
> >>+
> >>+		if (!dma->iommu_mapped)
> >>+			continue;
> >>+
> >>+		/* Go through all dma range anyway even if we fail */
> >>+		if (start)
> >>+			vfio_dma_dirty_log_start(iommu, dma);
> >>+		else
> >>+			vfio_dma_dirty_log_stop(iommu, dma);
> >>+	}
> >>+}
> >>+
> >>  static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
> >>  					unsigned long arg)
> >>  {
> >>@@ -2812,8 +2900,10 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
> >>  		pgsize = 1 << __ffs(iommu->pgsize_bitmap);
> >>  		if (!iommu->dirty_page_tracking) {
> >>  			ret = vfio_dma_bitmap_alloc_all(iommu, pgsize);
> >>-			if (!ret)
> >>+			if (!ret) {
> >>  				iommu->dirty_page_tracking = true;
> >>+				vfio_iommu_dirty_log_switch(iommu, true);
> >>+			}
> >>  		}
> >>  		mutex_unlock(&iommu->lock);
> >>  		return ret;
> >>@@ -2822,6 +2912,7 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
> >>  		if (iommu->dirty_page_tracking) {
> >>  			iommu->dirty_page_tracking = false;
> >>  			vfio_dma_bitmap_free_all(iommu);
> >>+			vfio_iommu_dirty_log_switch(iommu, false);
> >>  		}
> >>  		mutex_unlock(&iommu->lock);
> >>  		return 0;
> >>-- 
> >>2.19.1
> >_______________________________________________
> >iommu mailing list
> >iommu@lists.linux-foundation.org
> >https://lists.linuxfoundation.org/mailman/listinfo/iommu
> >
Robin Murphy Feb. 9, 2021, 12:08 p.m. UTC | #6
On 2021-02-09 11:57, Yi Sun wrote:
> On 21-02-07 18:40:36, Keqian Zhu wrote:
>> Hi Yi,
>>
>> On 2021/2/7 17:56, Yi Sun wrote:
>>> Hi,
>>>
>>> On 21-01-28 23:17:41, Keqian Zhu wrote:
>>>
>>> [...]
>>>
>>>> +static void vfio_dma_dirty_log_start(struct vfio_iommu *iommu,
>>>> +				     struct vfio_dma *dma)
>>>> +{
>>>> +	struct vfio_domain *d;
>>>> +
>>>> +	list_for_each_entry(d, &iommu->domain_list, next) {
>>>> +		/* Go through all domain anyway even if we fail */
>>>> +		iommu_split_block(d->domain, dma->iova, dma->size);
>>>> +	}
>>>> +}
>>>
>>> This should be a switch to prepare for dirty log start. Per Intel
>>> Vtd spec, there is SLADE defined in Scalable-Mode PASID Table Entry.
>>> It enables Accessed/Dirty Flags in second-level paging entries.
>>> So, a generic iommu interface here is better. For Intel iommu, it
>>> enables SLADE. For ARM, it splits block.
>> Indeed, a generic interface name is better.
>>
>> The vendor iommu driver plays vendor's specific actions to start dirty log, and Intel iommu and ARM smmu may differ. Besides, we may add more actions in ARM smmu driver in future.
>>
>> One question: Though I am not familiar with Intel iommu, I think it also should split block mapping besides enable SLADE. Right?
>>
> I am not familiar with ARM smmu. :) So I want to clarify if the block
> in smmu is big page, e.g. 2M page? Intel Vtd manages the memory per
> page, 4KB/2MB/1GB.

Indeed, what you call large pages, we call blocks :)

Robin.

> There are two ways to manage dirty pages.
> 1. Keep default granularity. Just set SLADE to enable the dirty track.
> 2. Split big page to 4KB to get finer granularity.
> 
> But question about the second solution is if it can benefit the user
> space, e.g. live migration. If my understanding about smmu block (i.e.
> the big page) is correct, have you collected some performance data to
> prove that the split can improve performance? Thanks!
> 
>> Thanks,
>> Keqian
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
>
zhukeqian Feb. 18, 2021, 1:17 a.m. UTC | #7
Hi Yi,

On 2021/2/9 19:57, Yi Sun wrote:
> On 21-02-07 18:40:36, Keqian Zhu wrote:
>> Hi Yi,
>>
>> On 2021/2/7 17:56, Yi Sun wrote:
>>> Hi,
>>>
>>> On 21-01-28 23:17:41, Keqian Zhu wrote:
>>>
>>> [...]
>>>
>>>> +static void vfio_dma_dirty_log_start(struct vfio_iommu *iommu,
>>>> +				     struct vfio_dma *dma)
>>>> +{
>>>> +	struct vfio_domain *d;
>>>> +
>>>> +	list_for_each_entry(d, &iommu->domain_list, next) {
>>>> +		/* Go through all domain anyway even if we fail */
>>>> +		iommu_split_block(d->domain, dma->iova, dma->size);
>>>> +	}
>>>> +}
>>>
>>> This should be a switch to prepare for dirty log start. Per Intel
>>> Vtd spec, there is SLADE defined in Scalable-Mode PASID Table Entry.
>>> It enables Accessed/Dirty Flags in second-level paging entries.
>>> So, a generic iommu interface here is better. For Intel iommu, it
>>> enables SLADE. For ARM, it splits block.
>> Indeed, a generic interface name is better.
>>
>> The vendor iommu driver plays vendor's specific actions to start dirty log, and Intel iommu and ARM smmu may differ. Besides, we may add more actions in ARM smmu driver in future.
>>
>> One question: Though I am not familiar with Intel iommu, I think it also should split block mapping besides enable SLADE. Right?
>>
> I am not familiar with ARM smmu. :) So I want to clarify if the block
> in smmu is big page, e.g. 2M page? Intel Vtd manages the memory per
Yes, for ARM, the "block" is big page :).

> page, 4KB/2MB/1GB. There are two ways to manage dirty pages.
> 1. Keep default granularity. Just set SLADE to enable the dirty track.
> 2. Split big page to 4KB to get finer granularity.
According to your statement, I see that VT-D's SLADE behaves like smmu HTTU. They are both based on page-table.

Right, we should give more freedom to iommu vendor driver, so a generic interface is better.
1) As you said, set SLADE when enable dirty log.
2) IOMMUs of other architecture may has completely different dirty tracking mechanism.

> 
> But question about the second solution is if it can benefit the user
> space, e.g. live migration. If my understanding about smmu block (i.e.
> the big page) is correct, have you collected some performance data to
> prove that the split can improve performance? Thanks!
The purpose of splitting block mapping is to reduce the amount of dirty bytes, which depends on actual DMA transaction.
Take an extreme example, if DMA writes one byte, under 1G mapping, the dirty amount reported to userspace is 1G, but under 4K mapping, the dirty amount is just 4K.

I will detail the commit message in v2.

Thanks,
Keqian
diff mbox series

Patch

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 3b8522ebf955..1cd10f3e7ed4 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -999,6 +999,25 @@  static bool vfio_group_supports_hwdbm(struct vfio_group *group)
 	return true;
 }
 
+static int vfio_iommu_dirty_log_clear(struct vfio_iommu *iommu,
+				      dma_addr_t start_iova, size_t size,
+				      unsigned long *bitmap_buffer,
+				      dma_addr_t base_iova, size_t pgsize)
+{
+	struct vfio_domain *d;
+	unsigned long pgshift = __ffs(pgsize);
+	int ret;
+
+	list_for_each_entry(d, &iommu->domain_list, next) {
+		ret = iommu_clear_dirty_log(d->domain, start_iova, size,
+					    bitmap_buffer, base_iova, pgshift);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
 			      struct vfio_dma *dma, dma_addr_t base_iova,
 			      size_t pgsize)
@@ -1010,13 +1029,28 @@  static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
 	unsigned long shift = bit_offset % BITS_PER_LONG;
 	unsigned long leftover;
 
+	if (iommu->pinned_page_dirty_scope || !dma->iommu_mapped)
+		goto bitmap_done;
+
+	/* try to get dirty log from IOMMU */
+	if (!iommu->num_non_hwdbm_groups) {
+		struct vfio_domain *d;
+
+		list_for_each_entry(d, &iommu->domain_list, next) {
+			if (iommu_sync_dirty_log(d->domain, dma->iova, dma->size,
+						dma->bitmap, dma->iova, pgshift))
+				return -EFAULT;
+		}
+		goto bitmap_done;
+	}
+
 	/*
 	 * mark all pages dirty if any IOMMU capable device is not able
 	 * to report dirty pages and all pages are pinned and mapped.
 	 */
-	if (!iommu->pinned_page_dirty_scope && dma->iommu_mapped)
-		bitmap_set(dma->bitmap, 0, nbits);
+	bitmap_set(dma->bitmap, 0, nbits);
 
+bitmap_done:
 	if (shift) {
 		bitmap_shift_left(dma->bitmap, dma->bitmap, shift,
 				  nbits + shift);
@@ -1078,6 +1112,18 @@  static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
 		 */
 		bitmap_clear(dma->bitmap, 0, dma->size >> pgshift);
 		vfio_dma_populate_bitmap(dma, pgsize);
+
+		/* Clear iommu dirty log to re-enable dirty log tracking */
+		if (!iommu->pinned_page_dirty_scope &&
+		    dma->iommu_mapped && !iommu->num_non_hwdbm_groups) {
+			ret = vfio_iommu_dirty_log_clear(iommu,	dma->iova,
+					dma->size, dma->bitmap, dma->iova,
+					pgsize);
+			if (ret) {
+				pr_warn("dma dirty log clear failed!\n");
+				return ret;
+			}
+		}
 	}
 	return 0;
 }
@@ -2780,6 +2826,48 @@  static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
 			-EFAULT : 0;
 }
 
+static void vfio_dma_dirty_log_start(struct vfio_iommu *iommu,
+				     struct vfio_dma *dma)
+{
+	struct vfio_domain *d;
+
+	list_for_each_entry(d, &iommu->domain_list, next) {
+		/* Go through all domain anyway even if we fail */
+		iommu_split_block(d->domain, dma->iova, dma->size);
+	}
+}
+
+static void vfio_dma_dirty_log_stop(struct vfio_iommu *iommu,
+				    struct vfio_dma *dma)
+{
+	struct vfio_domain *d;
+
+	list_for_each_entry(d, &iommu->domain_list, next) {
+		/* Go through all domain anyway even if we fail */
+		iommu_merge_page(d->domain, dma->iova, dma->size,
+				 d->prot | dma->prot);
+	}
+}
+
+static void vfio_iommu_dirty_log_switch(struct vfio_iommu *iommu, bool start)
+{
+	struct rb_node *n;
+
+	/* Split and merge even if all iommu don't support HWDBM now */
+	for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
+		struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
+
+		if (!dma->iommu_mapped)
+			continue;
+
+		/* Go through all dma range anyway even if we fail */
+		if (start)
+			vfio_dma_dirty_log_start(iommu, dma);
+		else
+			vfio_dma_dirty_log_stop(iommu, dma);
+	}
+}
+
 static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
 					unsigned long arg)
 {
@@ -2812,8 +2900,10 @@  static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
 		pgsize = 1 << __ffs(iommu->pgsize_bitmap);
 		if (!iommu->dirty_page_tracking) {
 			ret = vfio_dma_bitmap_alloc_all(iommu, pgsize);
-			if (!ret)
+			if (!ret) {
 				iommu->dirty_page_tracking = true;
+				vfio_iommu_dirty_log_switch(iommu, true);
+			}
 		}
 		mutex_unlock(&iommu->lock);
 		return ret;
@@ -2822,6 +2912,7 @@  static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
 		if (iommu->dirty_page_tracking) {
 			iommu->dirty_page_tracking = false;
 			vfio_dma_bitmap_free_all(iommu);
+			vfio_iommu_dirty_log_switch(iommu, false);
 		}
 		mutex_unlock(&iommu->lock);
 		return 0;