diff mbox series

[v3,01/12] iommu: Introduce dirty log tracking framework

Message ID 20210413085457.25400-2-zhukeqian1@huawei.com (mailing list archive)
State New
Headers show
Series iommu/smmuv3: Implement hardware dirty log tracking | expand

Commit Message

Keqian Zhu April 13, 2021, 8:54 a.m. UTC
Some types of IOMMU are capable of tracking DMA dirty log, such as
ARM SMMU with HTTU or Intel IOMMU with SLADE. This introduces the
dirty log tracking framework in the IOMMU base layer.

Three new essential interfaces are added, and we maintaince the status
of dirty log tracking in iommu_domain.
1. iommu_switch_dirty_log: Perform actions to start|stop dirty log tracking
2. iommu_sync_dirty_log: Sync dirty log from IOMMU into a dirty bitmap
3. iommu_clear_dirty_log: Clear dirty log of IOMMU by a mask bitmap

A new dev feature are added to indicate whether a specific type of
iommu hardware supports and its driver realizes them.

Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
---
 drivers/iommu/iommu.c | 150 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/iommu.h |  53 +++++++++++++++
 2 files changed, 203 insertions(+)

Comments

Lu Baolu April 14, 2021, 7 a.m. UTC | #1
Hi Keqian,

On 4/13/21 4:54 PM, Keqian Zhu wrote:
> Some types of IOMMU are capable of tracking DMA dirty log, such as
> ARM SMMU with HTTU or Intel IOMMU with SLADE. This introduces the
> dirty log tracking framework in the IOMMU base layer.
> 
> Three new essential interfaces are added, and we maintaince the status
> of dirty log tracking in iommu_domain.
> 1. iommu_switch_dirty_log: Perform actions to start|stop dirty log tracking
> 2. iommu_sync_dirty_log: Sync dirty log from IOMMU into a dirty bitmap
> 3. iommu_clear_dirty_log: Clear dirty log of IOMMU by a mask bitmap
> 
> A new dev feature are added to indicate whether a specific type of
> iommu hardware supports and its driver realizes them.
> 
> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
> Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
> ---
>   drivers/iommu/iommu.c | 150 ++++++++++++++++++++++++++++++++++++++++++
>   include/linux/iommu.h |  53 +++++++++++++++
>   2 files changed, 203 insertions(+)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index d0b0a15dba84..667b2d6d2fc0 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1922,6 +1922,7 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
>   	domain->type = type;
>   	/* Assume all sizes by default; the driver may override this later */
>   	domain->pgsize_bitmap  = bus->iommu_ops->pgsize_bitmap;
> +	mutex_init(&domain->switch_log_lock);
>   
>   	return domain;
>   }
> @@ -2720,6 +2721,155 @@ int iommu_domain_set_attr(struct iommu_domain *domain,
>   }
>   EXPORT_SYMBOL_GPL(iommu_domain_set_attr);
>   
> +int iommu_switch_dirty_log(struct iommu_domain *domain, bool enable,
> +			   unsigned long iova, size_t size, int prot)
> +{
> +	const struct iommu_ops *ops = domain->ops;
> +	int ret;
> +
> +	if (unlikely(!ops || !ops->switch_dirty_log))
> +		return -ENODEV;
> +
> +	mutex_lock(&domain->switch_log_lock);
> +	if (enable && domain->dirty_log_tracking) {
> +		ret = -EBUSY;
> +		goto out;
> +	} else if (!enable && !domain->dirty_log_tracking) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	ret = ops->switch_dirty_log(domain, enable, iova, size, prot);
> +	if (ret)
> +		goto out;
> +
> +	domain->dirty_log_tracking = enable;
> +out:
> +	mutex_unlock(&domain->switch_log_lock);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(iommu_switch_dirty_log);

Since you also added IOMMU_DEV_FEAT_HWDBM, I am wondering what's the
difference between

iommu_switch_dirty_log(on) vs. 
iommu_dev_enable_feature(IOMMU_DEV_FEAT_HWDBM)

iommu_switch_dirty_log(off) vs. 
iommu_dev_disable_feature(IOMMU_DEV_FEAT_HWDBM)

> +
> +int iommu_sync_dirty_log(struct iommu_domain *domain, unsigned long iova,
> +			 size_t size, unsigned long *bitmap,
> +			 unsigned long base_iova, unsigned long bitmap_pgshift)
> +{
> +	const struct iommu_ops *ops = domain->ops;
> +	unsigned int min_pagesz;
> +	size_t pgsize;
> +	int ret = 0;
> +
> +	if (unlikely(!ops || !ops->sync_dirty_log))
> +		return -ENODEV;
> +
> +	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
> +	if (!IS_ALIGNED(iova | size, min_pagesz)) {
> +		pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
> +		       iova, size, min_pagesz);
> +		return -EINVAL;
> +	}
> +
> +	mutex_lock(&domain->switch_log_lock);
> +	if (!domain->dirty_log_tracking) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	while (size) {
> +		pgsize = iommu_pgsize(domain, iova, size);
> +
> +		ret = ops->sync_dirty_log(domain, iova, pgsize,
> +					  bitmap, base_iova, bitmap_pgshift);

Any reason why do you want to do this in a per-4K page manner? This can
lead to a lot of indirect calls and bad performance.

How about a sync_dirty_pages()?

The same comment applies to other places in this patch series.

> +		if (ret)
> +			break;
> +
> +		pr_debug("dirty_log_sync handle: iova 0x%lx pagesz 0x%zx\n",
> +			 iova, pgsize);
> +
> +		iova += pgsize;
> +		size -= pgsize;
> +	}
> +out:
> +	mutex_unlock(&domain->switch_log_lock);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(iommu_sync_dirty_log);
> +
> +static int __iommu_clear_dirty_log(struct iommu_domain *domain,
> +				   unsigned long iova, size_t size,
> +				   unsigned long *bitmap,
> +				   unsigned long base_iova,
> +				   unsigned long bitmap_pgshift)
> +{
> +	const struct iommu_ops *ops = domain->ops;
> +	size_t pgsize;
> +	int ret = 0;
> +
> +	if (unlikely(!ops || !ops->clear_dirty_log))
> +		return -ENODEV;
> +
> +	while (size) {
> +		pgsize = iommu_pgsize(domain, iova, size);
> +
> +		ret = ops->clear_dirty_log(domain, iova, pgsize, bitmap,
> +					   base_iova, bitmap_pgshift);
> +		if (ret)
> +			break;
> +
> +		pr_debug("dirty_log_clear handled: iova 0x%lx pagesz 0x%zx\n",
> +			 iova, pgsize);
> +
> +		iova += pgsize;
> +		size -= pgsize;
> +	}
> +
> +	return ret;
> +}
> +
> +int iommu_clear_dirty_log(struct iommu_domain *domain,
> +			  unsigned long iova, size_t size,
> +			  unsigned long *bitmap, unsigned long base_iova,
> +			  unsigned long bitmap_pgshift)
> +{
> +	unsigned long riova, rsize;
> +	unsigned int min_pagesz;
> +	bool flush = false;
> +	int rs, re, start, end;
> +	int ret = 0;
> +
> +	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
> +	if (!IS_ALIGNED(iova | size, min_pagesz)) {
> +		pr_err("unaligned: iova 0x%lx min_pagesz 0x%x\n",
> +		       iova, min_pagesz);
> +		return -EINVAL;
> +	}
> +
> +	mutex_lock(&domain->switch_log_lock);
> +	if (!domain->dirty_log_tracking) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	start = (iova - base_iova) >> bitmap_pgshift;
> +	end = start + (size >> bitmap_pgshift);
> +	bitmap_for_each_set_region(bitmap, rs, re, start, end) {
> +		flush = true;
> +		riova = iova + (rs << bitmap_pgshift);
> +		rsize = (re - rs) << bitmap_pgshift;
> +		ret = __iommu_clear_dirty_log(domain, riova, rsize, bitmap,
> +					      base_iova, bitmap_pgshift);
> +		if (ret)
> +			break;
> +	}
> +
> +	if (flush)
> +		iommu_flush_iotlb_all(domain);
> +out:
> +	mutex_unlock(&domain->switch_log_lock);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(iommu_clear_dirty_log);
> +
>   void iommu_get_resv_regions(struct device *dev, struct list_head *list)
>   {
>   	const struct iommu_ops *ops = dev->bus->iommu_ops;
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 5e7fe519430a..7f9ed9f520e2 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -87,6 +87,8 @@ struct iommu_domain {
>   	void *handler_token;
>   	struct iommu_domain_geometry geometry;
>   	void *iova_cookie;
> +	bool dirty_log_tracking;
> +	struct mutex switch_log_lock;
>   };
>   
>   enum iommu_cap {
> @@ -160,6 +162,7 @@ struct iommu_resv_region {
>   enum iommu_dev_features {
>   	IOMMU_DEV_FEAT_AUX,	/* Aux-domain feature */
>   	IOMMU_DEV_FEAT_SVA,	/* Shared Virtual Addresses */
> +	IOMMU_DEV_FEAT_HWDBM,	/* Hardware Dirty Bit Management */
>   };
>   
>   #define IOMMU_PASID_INVALID	(-1U)
> @@ -205,6 +208,9 @@ struct iommu_iotlb_gather {
>    * @device_group: find iommu group for a particular device
>    * @domain_get_attr: Query domain attributes
>    * @domain_set_attr: Change domain attributes
> + * @switch_dirty_log: Perform actions to start|stop dirty log tracking
> + * @sync_dirty_log: Sync dirty log from IOMMU into a dirty bitmap
> + * @clear_dirty_log: Clear dirty log of IOMMU by a mask bitmap
>    * @get_resv_regions: Request list of reserved regions for a device
>    * @put_resv_regions: Free list of reserved regions for a device
>    * @apply_resv_region: Temporary helper call-back for iova reserved ranges
> @@ -260,6 +266,18 @@ struct iommu_ops {
>   	int (*domain_set_attr)(struct iommu_domain *domain,
>   			       enum iommu_attr attr, void *data);
>   
> +	/* Track dirty log */
> +	int (*switch_dirty_log)(struct iommu_domain *domain, bool enable,
> +				unsigned long iova, size_t size, int prot);
> +	int (*sync_dirty_log)(struct iommu_domain *domain,
> +			      unsigned long iova, size_t size,
> +			      unsigned long *bitmap, unsigned long base_iova,
> +			      unsigned long bitmap_pgshift);
> +	int (*clear_dirty_log)(struct iommu_domain *domain,
> +			       unsigned long iova, size_t size,
> +			       unsigned long *bitmap, unsigned long base_iova,
> +			       unsigned long bitmap_pgshift);
> +
>   	/* Request/Free a list of reserved regions for a device */
>   	void (*get_resv_regions)(struct device *dev, struct list_head *list);
>   	void (*put_resv_regions)(struct device *dev, struct list_head *list);
> @@ -511,6 +529,16 @@ extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr,
>   				 void *data);
>   extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr,
>   				 void *data);
> +extern int iommu_switch_dirty_log(struct iommu_domain *domain, bool enable,
> +				  unsigned long iova, size_t size, int prot);
> +extern int iommu_sync_dirty_log(struct iommu_domain *domain, unsigned long iova,
> +				size_t size, unsigned long *bitmap,
> +				unsigned long base_iova,
> +				unsigned long bitmap_pgshift);
> +extern int iommu_clear_dirty_log(struct iommu_domain *domain, unsigned long iova,
> +				 size_t dma_size, unsigned long *bitmap,
> +				 unsigned long base_iova,
> +				 unsigned long bitmap_pgshift);
>   
>   /* Window handling function prototypes */
>   extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
> @@ -901,6 +929,31 @@ static inline int iommu_domain_set_attr(struct iommu_domain *domain,
>   	return -EINVAL;
>   }
>   
> +static inline int iommu_switch_dirty_log(struct iommu_domain *domain,
> +					 bool enable, unsigned long iova,
> +					 size_t size, int prot)
> +{
> +	return -EINVAL;
> +}
> +
> +static inline int iommu_sync_dirty_log(struct iommu_domain *domain,
> +				       unsigned long iova, size_t size,
> +				       unsigned long *bitmap,
> +				       unsigned long base_iova,
> +				       unsigned long pgshift)
> +{
> +	return -EINVAL;
> +}
> +
> +static inline int iommu_clear_dirty_log(struct iommu_domain *domain,
> +					unsigned long iova, size_t size,
> +					unsigned long *bitmap,
> +					unsigned long base_iova,
> +					unsigned long pgshift)
> +{
> +	return -EINVAL;
> +}
> +
>   static inline int  iommu_device_register(struct iommu_device *iommu)
>   {
>   	return -ENODEV;
> 

Best regards,
baolu
Keqian Zhu April 15, 2021, 6:18 a.m. UTC | #2
Hi Baolu,

Thanks for the review!

On 2021/4/14 15:00, Lu Baolu wrote:
> Hi Keqian,
> 
> On 4/13/21 4:54 PM, Keqian Zhu wrote:
>> Some types of IOMMU are capable of tracking DMA dirty log, such as
>> ARM SMMU with HTTU or Intel IOMMU with SLADE. This introduces the
>> dirty log tracking framework in the IOMMU base layer.
>>
>> Three new essential interfaces are added, and we maintaince the status
>> of dirty log tracking in iommu_domain.
>> 1. iommu_switch_dirty_log: Perform actions to start|stop dirty log tracking
>> 2. iommu_sync_dirty_log: Sync dirty log from IOMMU into a dirty bitmap
>> 3. iommu_clear_dirty_log: Clear dirty log of IOMMU by a mask bitmap
>>
>> A new dev feature are added to indicate whether a specific type of
>> iommu hardware supports and its driver realizes them.
>>
>> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
>> Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
>> ---
>>   drivers/iommu/iommu.c | 150 ++++++++++++++++++++++++++++++++++++++++++
>>   include/linux/iommu.h |  53 +++++++++++++++
>>   2 files changed, 203 insertions(+)
>>
>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>> index d0b0a15dba84..667b2d6d2fc0 100644
>> --- a/drivers/iommu/iommu.c
>> +++ b/drivers/iommu/iommu.c
>> @@ -1922,6 +1922,7 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
>>       domain->type = type;
>>       /* Assume all sizes by default; the driver may override this later */
>>       domain->pgsize_bitmap  = bus->iommu_ops->pgsize_bitmap;
>> +    mutex_init(&domain->switch_log_lock);
>>         return domain;
>>   }
>> @@ -2720,6 +2721,155 @@ int iommu_domain_set_attr(struct iommu_domain *domain,
>>   }
>>   EXPORT_SYMBOL_GPL(iommu_domain_set_attr);
>>   +int iommu_switch_dirty_log(struct iommu_domain *domain, bool enable,
>> +               unsigned long iova, size_t size, int prot)
>> +{
>> +    const struct iommu_ops *ops = domain->ops;
>> +    int ret;
>> +
>> +    if (unlikely(!ops || !ops->switch_dirty_log))
>> +        return -ENODEV;
>> +
>> +    mutex_lock(&domain->switch_log_lock);
>> +    if (enable && domain->dirty_log_tracking) {
>> +        ret = -EBUSY;
>> +        goto out;
>> +    } else if (!enable && !domain->dirty_log_tracking) {
>> +        ret = -EINVAL;
>> +        goto out;
>> +    }
>> +
>> +    ret = ops->switch_dirty_log(domain, enable, iova, size, prot);
>> +    if (ret)
>> +        goto out;
>> +
>> +    domain->dirty_log_tracking = enable;
>> +out:
>> +    mutex_unlock(&domain->switch_log_lock);
>> +    return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(iommu_switch_dirty_log);
> 
> Since you also added IOMMU_DEV_FEAT_HWDBM, I am wondering what's the
> difference between
> 
> iommu_switch_dirty_log(on) vs. iommu_dev_enable_feature(IOMMU_DEV_FEAT_HWDBM)
> 
> iommu_switch_dirty_log(off) vs. iommu_dev_disable_feature(IOMMU_DEV_FEAT_HWDBM)
Indeed. As I can see, IOMMU_DEV_FEAT_AUX is not switchable, so enable/disable
are not applicable for it. IOMMU_DEV_FEAT_SVA is switchable, so we can use these
interfaces for it.

IOMMU_DEV_FEAT_HWDBM is used to indicate whether hardware supports HWDBM, so we should
design it as not switchable. I will modify the commit message of patch#12, thanks!

> 
>> +
>> +int iommu_sync_dirty_log(struct iommu_domain *domain, unsigned long iova,
>> +             size_t size, unsigned long *bitmap,
>> +             unsigned long base_iova, unsigned long bitmap_pgshift)
>> +{
>> +    const struct iommu_ops *ops = domain->ops;
>> +    unsigned int min_pagesz;
>> +    size_t pgsize;
>> +    int ret = 0;
>> +
>> +    if (unlikely(!ops || !ops->sync_dirty_log))
>> +        return -ENODEV;
>> +
>> +    min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
>> +    if (!IS_ALIGNED(iova | size, min_pagesz)) {
>> +        pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
>> +               iova, size, min_pagesz);
>> +        return -EINVAL;
>> +    }
>> +
>> +    mutex_lock(&domain->switch_log_lock);
>> +    if (!domain->dirty_log_tracking) {
>> +        ret = -EINVAL;
>> +        goto out;
>> +    }
>> +
>> +    while (size) {
>> +        pgsize = iommu_pgsize(domain, iova, size);
>> +
>> +        ret = ops->sync_dirty_log(domain, iova, pgsize,
>> +                      bitmap, base_iova, bitmap_pgshift);
> 
> Any reason why do you want to do this in a per-4K page manner? This can
> lead to a lot of indirect calls and bad performance.
> 
> How about a sync_dirty_pages()?
The function name of iommu_pgsize() is a bit puzzling. Actually it will try to
compute the max size that fit into size, so the pgsize can be a large page size
even if the underlying mapping is 4K. The __iommu_unmap() also has a similar logic.

BRs,
Keqian
Lu Baolu April 15, 2021, 7:03 a.m. UTC | #3
On 4/15/21 2:18 PM, Keqian Zhu wrote:
> Hi Baolu,
> 
> Thanks for the review!
> 
> On 2021/4/14 15:00, Lu Baolu wrote:
>> Hi Keqian,
>>
>> On 4/13/21 4:54 PM, Keqian Zhu wrote:
>>> Some types of IOMMU are capable of tracking DMA dirty log, such as
>>> ARM SMMU with HTTU or Intel IOMMU with SLADE. This introduces the
>>> dirty log tracking framework in the IOMMU base layer.
>>>
>>> Three new essential interfaces are added, and we maintaince the status
>>> of dirty log tracking in iommu_domain.
>>> 1. iommu_switch_dirty_log: Perform actions to start|stop dirty log tracking
>>> 2. iommu_sync_dirty_log: Sync dirty log from IOMMU into a dirty bitmap
>>> 3. iommu_clear_dirty_log: Clear dirty log of IOMMU by a mask bitmap
>>>
>>> A new dev feature are added to indicate whether a specific type of
>>> iommu hardware supports and its driver realizes them.
>>>
>>> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
>>> Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
>>> ---
>>>    drivers/iommu/iommu.c | 150 ++++++++++++++++++++++++++++++++++++++++++
>>>    include/linux/iommu.h |  53 +++++++++++++++
>>>    2 files changed, 203 insertions(+)
>>>
>>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>>> index d0b0a15dba84..667b2d6d2fc0 100644
>>> --- a/drivers/iommu/iommu.c
>>> +++ b/drivers/iommu/iommu.c
>>> @@ -1922,6 +1922,7 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
>>>        domain->type = type;
>>>        /* Assume all sizes by default; the driver may override this later */
>>>        domain->pgsize_bitmap  = bus->iommu_ops->pgsize_bitmap;
>>> +    mutex_init(&domain->switch_log_lock);
>>>          return domain;
>>>    }
>>> @@ -2720,6 +2721,155 @@ int iommu_domain_set_attr(struct iommu_domain *domain,
>>>    }
>>>    EXPORT_SYMBOL_GPL(iommu_domain_set_attr);
>>>    +int iommu_switch_dirty_log(struct iommu_domain *domain, bool enable,
>>> +               unsigned long iova, size_t size, int prot)
>>> +{
>>> +    const struct iommu_ops *ops = domain->ops;
>>> +    int ret;
>>> +
>>> +    if (unlikely(!ops || !ops->switch_dirty_log))
>>> +        return -ENODEV;
>>> +
>>> +    mutex_lock(&domain->switch_log_lock);
>>> +    if (enable && domain->dirty_log_tracking) {
>>> +        ret = -EBUSY;
>>> +        goto out;
>>> +    } else if (!enable && !domain->dirty_log_tracking) {
>>> +        ret = -EINVAL;
>>> +        goto out;
>>> +    }
>>> +
>>> +    ret = ops->switch_dirty_log(domain, enable, iova, size, prot);
>>> +    if (ret)
>>> +        goto out;
>>> +
>>> +    domain->dirty_log_tracking = enable;
>>> +out:
>>> +    mutex_unlock(&domain->switch_log_lock);
>>> +    return ret;
>>> +}
>>> +EXPORT_SYMBOL_GPL(iommu_switch_dirty_log);
>>
>> Since you also added IOMMU_DEV_FEAT_HWDBM, I am wondering what's the
>> difference between
>>
>> iommu_switch_dirty_log(on) vs. iommu_dev_enable_feature(IOMMU_DEV_FEAT_HWDBM)
>>
>> iommu_switch_dirty_log(off) vs. iommu_dev_disable_feature(IOMMU_DEV_FEAT_HWDBM)
> Indeed. As I can see, IOMMU_DEV_FEAT_AUX is not switchable, so enable/disable
> are not applicable for it. IOMMU_DEV_FEAT_SVA is switchable, so we can use these
> interfaces for it.
> 
> IOMMU_DEV_FEAT_HWDBM is used to indicate whether hardware supports HWDBM, so we should

Previously we had iommu_dev_has_feature() and then was cleaned up due to
lack of real users. If you have a real case for it, why not bringing it
back?

> design it as not switchable. I will modify the commit message of patch#12, thanks!

I am not sure that I fully get your point. But I can't see any gaps of
using iommu_dev_enable/disable_feature() to switch dirty log on and off.
Probably I missed anything.

> 
>>
>>> +
>>> +int iommu_sync_dirty_log(struct iommu_domain *domain, unsigned long iova,
>>> +             size_t size, unsigned long *bitmap,
>>> +             unsigned long base_iova, unsigned long bitmap_pgshift)
>>> +{
>>> +    const struct iommu_ops *ops = domain->ops;
>>> +    unsigned int min_pagesz;
>>> +    size_t pgsize;
>>> +    int ret = 0;
>>> +
>>> +    if (unlikely(!ops || !ops->sync_dirty_log))
>>> +        return -ENODEV;
>>> +
>>> +    min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
>>> +    if (!IS_ALIGNED(iova | size, min_pagesz)) {
>>> +        pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
>>> +               iova, size, min_pagesz);
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    mutex_lock(&domain->switch_log_lock);
>>> +    if (!domain->dirty_log_tracking) {
>>> +        ret = -EINVAL;
>>> +        goto out;
>>> +    }
>>> +
>>> +    while (size) {
>>> +        pgsize = iommu_pgsize(domain, iova, size);
>>> +
>>> +        ret = ops->sync_dirty_log(domain, iova, pgsize,
>>> +                      bitmap, base_iova, bitmap_pgshift);
>>
>> Any reason why do you want to do this in a per-4K page manner? This can
>> lead to a lot of indirect calls and bad performance.
>>
>> How about a sync_dirty_pages()?
> The function name of iommu_pgsize() is a bit puzzling. Actually it will try to
> compute the max size that fit into size, so the pgsize can be a large page size
> even if the underlying mapping is 4K. The __iommu_unmap() also has a similar logic.

This series has some improvement on the iommu_pgsize() helper.

https://lore.kernel.org/linux-iommu/20210405191112.28192-1-isaacm@codeaurora.org/

Best regards,
baolu
Keqian Zhu April 15, 2021, 7:43 a.m. UTC | #4
On 2021/4/15 15:03, Lu Baolu wrote:
> On 4/15/21 2:18 PM, Keqian Zhu wrote:
>> Hi Baolu,
>>
>> Thanks for the review!
>>
>> On 2021/4/14 15:00, Lu Baolu wrote:
>>> Hi Keqian,
>>>
>>> On 4/13/21 4:54 PM, Keqian Zhu wrote:
>>>> Some types of IOMMU are capable of tracking DMA dirty log, such as
>>>> ARM SMMU with HTTU or Intel IOMMU with SLADE. This introduces the
>>>> dirty log tracking framework in the IOMMU base layer.
>>>>
>>>> Three new essential interfaces are added, and we maintaince the status
>>>> of dirty log tracking in iommu_domain.
>>>> 1. iommu_switch_dirty_log: Perform actions to start|stop dirty log tracking
>>>> 2. iommu_sync_dirty_log: Sync dirty log from IOMMU into a dirty bitmap
>>>> 3. iommu_clear_dirty_log: Clear dirty log of IOMMU by a mask bitmap
>>>>
>>>> A new dev feature are added to indicate whether a specific type of
>>>> iommu hardware supports and its driver realizes them.
>>>>
>>>> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
>>>> Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
>>>> ---
>>>>    drivers/iommu/iommu.c | 150 ++++++++++++++++++++++++++++++++++++++++++
>>>>    include/linux/iommu.h |  53 +++++++++++++++
>>>>    2 files changed, 203 insertions(+)
>>>>
>>>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>>>> index d0b0a15dba84..667b2d6d2fc0 100644
>>>> --- a/drivers/iommu/iommu.c
>>>> +++ b/drivers/iommu/iommu.c
>>>> @@ -1922,6 +1922,7 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
>>>>        domain->type = type;
>>>>        /* Assume all sizes by default; the driver may override this later */
>>>>        domain->pgsize_bitmap  = bus->iommu_ops->pgsize_bitmap;
>>>> +    mutex_init(&domain->switch_log_lock);
>>>>          return domain;
>>>>    }
>>>> @@ -2720,6 +2721,155 @@ int iommu_domain_set_attr(struct iommu_domain *domain,
>>>>    }
>>>>    EXPORT_SYMBOL_GPL(iommu_domain_set_attr);
>>>>    +int iommu_switch_dirty_log(struct iommu_domain *domain, bool enable,
>>>> +               unsigned long iova, size_t size, int prot)
>>>> +{
>>>> +    const struct iommu_ops *ops = domain->ops;
>>>> +    int ret;
>>>> +
>>>> +    if (unlikely(!ops || !ops->switch_dirty_log))
>>>> +        return -ENODEV;
>>>> +
>>>> +    mutex_lock(&domain->switch_log_lock);
>>>> +    if (enable && domain->dirty_log_tracking) {
>>>> +        ret = -EBUSY;
>>>> +        goto out;
>>>> +    } else if (!enable && !domain->dirty_log_tracking) {
>>>> +        ret = -EINVAL;
>>>> +        goto out;
>>>> +    }
>>>> +
>>>> +    ret = ops->switch_dirty_log(domain, enable, iova, size, prot);
>>>> +    if (ret)
>>>> +        goto out;
>>>> +
>>>> +    domain->dirty_log_tracking = enable;
>>>> +out:
>>>> +    mutex_unlock(&domain->switch_log_lock);
>>>> +    return ret;
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(iommu_switch_dirty_log);
>>>
>>> Since you also added IOMMU_DEV_FEAT_HWDBM, I am wondering what's the
>>> difference between
>>>
>>> iommu_switch_dirty_log(on) vs. iommu_dev_enable_feature(IOMMU_DEV_FEAT_HWDBM)
>>>
>>> iommu_switch_dirty_log(off) vs. iommu_dev_disable_feature(IOMMU_DEV_FEAT_HWDBM)
>> Indeed. As I can see, IOMMU_DEV_FEAT_AUX is not switchable, so enable/disable
>> are not applicable for it. IOMMU_DEV_FEAT_SVA is switchable, so we can use these
>> interfaces for it.
>>
>> IOMMU_DEV_FEAT_HWDBM is used to indicate whether hardware supports HWDBM, so we should
> 
> Previously we had iommu_dev_has_feature() and then was cleaned up due to
> lack of real users. If you have a real case for it, why not bringing it
> back?
Yep, good suggestion.

> 
>> design it as not switchable. I will modify the commit message of patch#12, thanks!
> 
> I am not sure that I fully get your point. But I can't see any gaps of
> using iommu_dev_enable/disable_feature() to switch dirty log on and off.
> Probably I missed anything.
IOMMU_DEV_FEAT_HWDBM just tells user whether underlying IOMMU driver supports
dirty tracking, it is not used to management the status of dirty log tracking.

The feature reporting is per device, but the status management is per iommu_domain.
Only when all devices in a domain support HWDBM, we can start dirty log for the domain.

And I think we'd better not mix the feature reporting and status management. Thoughts?

> 
>>
>>>
>>>> +
>>>> +int iommu_sync_dirty_log(struct iommu_domain *domain, unsigned long iova,
>>>> +             size_t size, unsigned long *bitmap,
>>>> +             unsigned long base_iova, unsigned long bitmap_pgshift)
>>>> +{
>>>> +    const struct iommu_ops *ops = domain->ops;
>>>> +    unsigned int min_pagesz;
>>>> +    size_t pgsize;
>>>> +    int ret = 0;
>>>> +
>>>> +    if (unlikely(!ops || !ops->sync_dirty_log))
>>>> +        return -ENODEV;
>>>> +
>>>> +    min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
>>>> +    if (!IS_ALIGNED(iova | size, min_pagesz)) {
>>>> +        pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
>>>> +               iova, size, min_pagesz);
>>>> +        return -EINVAL;
>>>> +    }
>>>> +
>>>> +    mutex_lock(&domain->switch_log_lock);
>>>> +    if (!domain->dirty_log_tracking) {
>>>> +        ret = -EINVAL;
>>>> +        goto out;
>>>> +    }
>>>> +
>>>> +    while (size) {
>>>> +        pgsize = iommu_pgsize(domain, iova, size);
>>>> +
>>>> +        ret = ops->sync_dirty_log(domain, iova, pgsize,
>>>> +                      bitmap, base_iova, bitmap_pgshift);
>>>
>>> Any reason why do you want to do this in a per-4K page manner? This can
>>> lead to a lot of indirect calls and bad performance.
>>>
>>> How about a sync_dirty_pages()?
>> The function name of iommu_pgsize() is a bit puzzling. Actually it will try to
>> compute the max size that fit into size, so the pgsize can be a large page size
>> even if the underlying mapping is 4K. The __iommu_unmap() also has a similar logic.
> 
> This series has some improvement on the iommu_pgsize() helper.
> 
> https://lore.kernel.org/linux-iommu/20210405191112.28192-1-isaacm@codeaurora.org/
OK, I get your idea. I will look into that, thanks!

BRs,
Keqian
Lu Baolu April 15, 2021, 10:21 a.m. UTC | #5
Hi,

On 2021/4/15 15:43, Keqian Zhu wrote:
>>> design it as not switchable. I will modify the commit message of patch#12, thanks!
>> I am not sure that I fully get your point. But I can't see any gaps of
>> using iommu_dev_enable/disable_feature() to switch dirty log on and off.
>> Probably I missed anything.
> IOMMU_DEV_FEAT_HWDBM just tells user whether underlying IOMMU driver supports
> dirty tracking, it is not used to management the status of dirty log tracking.
> 
> The feature reporting is per device, but the status management is per iommu_domain.
> Only when all devices in a domain support HWDBM, we can start dirty log for the domain.

So why not

	for_each_device_attached_domain()
		iommu_dev_enable_feature(IOMMU_DEV_FEAT_HWDBM)

?
> 
> And I think we'd better not mix the feature reporting and status management. Thoughts?
> 

I am worrying about having two sets of APIs for single purpose. From
vendor iommu driver's point of view, this feature is per device. Hence,
it still needs to do the same thing.

Best regards,
baolu
Keqian Zhu April 16, 2021, 9:07 a.m. UTC | #6
Hi Baolu,

On 2021/4/15 18:21, Lu Baolu wrote:
> Hi,
> 
> On 2021/4/15 15:43, Keqian Zhu wrote:
>>>> design it as not switchable. I will modify the commit message of patch#12, thanks!
>>> I am not sure that I fully get your point. But I can't see any gaps of
>>> using iommu_dev_enable/disable_feature() to switch dirty log on and off.
>>> Probably I missed anything.
>> IOMMU_DEV_FEAT_HWDBM just tells user whether underlying IOMMU driver supports
>> dirty tracking, it is not used to management the status of dirty log tracking.
>>
>> The feature reporting is per device, but the status management is per iommu_domain.
>> Only when all devices in a domain support HWDBM, we can start dirty log for the domain.
> 
> So why not
> 
>     for_each_device_attached_domain()
>         iommu_dev_enable_feature(IOMMU_DEV_FEAT_HWDBM)
Looks reasonable, but the problem is that we just need to enable dirty log once per domain.

> 
> ?
>>
>> And I think we'd better not mix the feature reporting and status management. Thoughts?
>>
> 
> I am worrying about having two sets of APIs for single purpose. From
> vendor iommu driver's point of view, this feature is per device. Hence,
> it still needs to do the same thing.
Yes, we can unify the granule of feature reporting and status management.

The basic granule of dirty tracking is iommu_domain, I think it's very reasonable. We need an
interface to report the feature of iommu_domain, then the logic is much more clear.

Every time we add new device or remove device from the domain, we should update the feature (e.g.,
maintain a counter of unsupported devices).

What do you think about this idea?

Thanks,
Keqian
Lu Baolu April 19, 2021, 1:59 a.m. UTC | #7
Hi Keqian,

On 4/16/21 5:07 PM, Keqian Zhu wrote:
>> I am worrying about having two sets of APIs for single purpose. From
>> vendor iommu driver's point of view, this feature is per device. Hence,
>> it still needs to do the same thing.
> Yes, we can unify the granule of feature reporting and status management.
> 
> The basic granule of dirty tracking is iommu_domain, I think it's very reasonable. We need an
> interface to report the feature of iommu_domain, then the logic is much more clear.
> 
> Every time we add new device or remove device from the domain, we should update the feature (e.g.,
> maintain a counter of unsupported devices).

Yes. This looks cleaner.

> 
> What do you think about this idea?
> 
> Thanks,
> Keqian

Best regards,
baolu
diff mbox series

Patch

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d0b0a15dba84..667b2d6d2fc0 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1922,6 +1922,7 @@  static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
 	domain->type = type;
 	/* Assume all sizes by default; the driver may override this later */
 	domain->pgsize_bitmap  = bus->iommu_ops->pgsize_bitmap;
+	mutex_init(&domain->switch_log_lock);
 
 	return domain;
 }
@@ -2720,6 +2721,155 @@  int iommu_domain_set_attr(struct iommu_domain *domain,
 }
 EXPORT_SYMBOL_GPL(iommu_domain_set_attr);
 
+int iommu_switch_dirty_log(struct iommu_domain *domain, bool enable,
+			   unsigned long iova, size_t size, int prot)
+{
+	const struct iommu_ops *ops = domain->ops;
+	int ret;
+
+	if (unlikely(!ops || !ops->switch_dirty_log))
+		return -ENODEV;
+
+	mutex_lock(&domain->switch_log_lock);
+	if (enable && domain->dirty_log_tracking) {
+		ret = -EBUSY;
+		goto out;
+	} else if (!enable && !domain->dirty_log_tracking) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = ops->switch_dirty_log(domain, enable, iova, size, prot);
+	if (ret)
+		goto out;
+
+	domain->dirty_log_tracking = enable;
+out:
+	mutex_unlock(&domain->switch_log_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_switch_dirty_log);
+
+int iommu_sync_dirty_log(struct iommu_domain *domain, unsigned long iova,
+			 size_t size, unsigned long *bitmap,
+			 unsigned long base_iova, unsigned long bitmap_pgshift)
+{
+	const struct iommu_ops *ops = domain->ops;
+	unsigned int min_pagesz;
+	size_t pgsize;
+	int ret = 0;
+
+	if (unlikely(!ops || !ops->sync_dirty_log))
+		return -ENODEV;
+
+	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
+	if (!IS_ALIGNED(iova | size, min_pagesz)) {
+		pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
+		       iova, size, min_pagesz);
+		return -EINVAL;
+	}
+
+	mutex_lock(&domain->switch_log_lock);
+	if (!domain->dirty_log_tracking) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	while (size) {
+		pgsize = iommu_pgsize(domain, iova, size);
+
+		ret = ops->sync_dirty_log(domain, iova, pgsize,
+					  bitmap, base_iova, bitmap_pgshift);
+		if (ret)
+			break;
+
+		pr_debug("dirty_log_sync handle: iova 0x%lx pagesz 0x%zx\n",
+			 iova, pgsize);
+
+		iova += pgsize;
+		size -= pgsize;
+	}
+out:
+	mutex_unlock(&domain->switch_log_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_sync_dirty_log);
+
+static int __iommu_clear_dirty_log(struct iommu_domain *domain,
+				   unsigned long iova, size_t size,
+				   unsigned long *bitmap,
+				   unsigned long base_iova,
+				   unsigned long bitmap_pgshift)
+{
+	const struct iommu_ops *ops = domain->ops;
+	size_t pgsize;
+	int ret = 0;
+
+	if (unlikely(!ops || !ops->clear_dirty_log))
+		return -ENODEV;
+
+	while (size) {
+		pgsize = iommu_pgsize(domain, iova, size);
+
+		ret = ops->clear_dirty_log(domain, iova, pgsize, bitmap,
+					   base_iova, bitmap_pgshift);
+		if (ret)
+			break;
+
+		pr_debug("dirty_log_clear handled: iova 0x%lx pagesz 0x%zx\n",
+			 iova, pgsize);
+
+		iova += pgsize;
+		size -= pgsize;
+	}
+
+	return ret;
+}
+
+int iommu_clear_dirty_log(struct iommu_domain *domain,
+			  unsigned long iova, size_t size,
+			  unsigned long *bitmap, unsigned long base_iova,
+			  unsigned long bitmap_pgshift)
+{
+	unsigned long riova, rsize;
+	unsigned int min_pagesz;
+	bool flush = false;
+	int rs, re, start, end;
+	int ret = 0;
+
+	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
+	if (!IS_ALIGNED(iova | size, min_pagesz)) {
+		pr_err("unaligned: iova 0x%lx min_pagesz 0x%x\n",
+		       iova, min_pagesz);
+		return -EINVAL;
+	}
+
+	mutex_lock(&domain->switch_log_lock);
+	if (!domain->dirty_log_tracking) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	start = (iova - base_iova) >> bitmap_pgshift;
+	end = start + (size >> bitmap_pgshift);
+	bitmap_for_each_set_region(bitmap, rs, re, start, end) {
+		flush = true;
+		riova = iova + (rs << bitmap_pgshift);
+		rsize = (re - rs) << bitmap_pgshift;
+		ret = __iommu_clear_dirty_log(domain, riova, rsize, bitmap,
+					      base_iova, bitmap_pgshift);
+		if (ret)
+			break;
+	}
+
+	if (flush)
+		iommu_flush_iotlb_all(domain);
+out:
+	mutex_unlock(&domain->switch_log_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_clear_dirty_log);
+
 void iommu_get_resv_regions(struct device *dev, struct list_head *list)
 {
 	const struct iommu_ops *ops = dev->bus->iommu_ops;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5e7fe519430a..7f9ed9f520e2 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -87,6 +87,8 @@  struct iommu_domain {
 	void *handler_token;
 	struct iommu_domain_geometry geometry;
 	void *iova_cookie;
+	bool dirty_log_tracking;
+	struct mutex switch_log_lock;
 };
 
 enum iommu_cap {
@@ -160,6 +162,7 @@  struct iommu_resv_region {
 enum iommu_dev_features {
 	IOMMU_DEV_FEAT_AUX,	/* Aux-domain feature */
 	IOMMU_DEV_FEAT_SVA,	/* Shared Virtual Addresses */
+	IOMMU_DEV_FEAT_HWDBM,	/* Hardware Dirty Bit Management */
 };
 
 #define IOMMU_PASID_INVALID	(-1U)
@@ -205,6 +208,9 @@  struct iommu_iotlb_gather {
  * @device_group: find iommu group for a particular device
  * @domain_get_attr: Query domain attributes
  * @domain_set_attr: Change domain attributes
+ * @switch_dirty_log: Perform actions to start|stop dirty log tracking
+ * @sync_dirty_log: Sync dirty log from IOMMU into a dirty bitmap
+ * @clear_dirty_log: Clear dirty log of IOMMU by a mask bitmap
  * @get_resv_regions: Request list of reserved regions for a device
  * @put_resv_regions: Free list of reserved regions for a device
  * @apply_resv_region: Temporary helper call-back for iova reserved ranges
@@ -260,6 +266,18 @@  struct iommu_ops {
 	int (*domain_set_attr)(struct iommu_domain *domain,
 			       enum iommu_attr attr, void *data);
 
+	/* Track dirty log */
+	int (*switch_dirty_log)(struct iommu_domain *domain, bool enable,
+				unsigned long iova, size_t size, int prot);
+	int (*sync_dirty_log)(struct iommu_domain *domain,
+			      unsigned long iova, size_t size,
+			      unsigned long *bitmap, unsigned long base_iova,
+			      unsigned long bitmap_pgshift);
+	int (*clear_dirty_log)(struct iommu_domain *domain,
+			       unsigned long iova, size_t size,
+			       unsigned long *bitmap, unsigned long base_iova,
+			       unsigned long bitmap_pgshift);
+
 	/* Request/Free a list of reserved regions for a device */
 	void (*get_resv_regions)(struct device *dev, struct list_head *list);
 	void (*put_resv_regions)(struct device *dev, struct list_head *list);
@@ -511,6 +529,16 @@  extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr,
 				 void *data);
 extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr,
 				 void *data);
+extern int iommu_switch_dirty_log(struct iommu_domain *domain, bool enable,
+				  unsigned long iova, size_t size, int prot);
+extern int iommu_sync_dirty_log(struct iommu_domain *domain, unsigned long iova,
+				size_t size, unsigned long *bitmap,
+				unsigned long base_iova,
+				unsigned long bitmap_pgshift);
+extern int iommu_clear_dirty_log(struct iommu_domain *domain, unsigned long iova,
+				 size_t dma_size, unsigned long *bitmap,
+				 unsigned long base_iova,
+				 unsigned long bitmap_pgshift);
 
 /* Window handling function prototypes */
 extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
@@ -901,6 +929,31 @@  static inline int iommu_domain_set_attr(struct iommu_domain *domain,
 	return -EINVAL;
 }
 
+static inline int iommu_switch_dirty_log(struct iommu_domain *domain,
+					 bool enable, unsigned long iova,
+					 size_t size, int prot)
+{
+	return -EINVAL;
+}
+
+static inline int iommu_sync_dirty_log(struct iommu_domain *domain,
+				       unsigned long iova, size_t size,
+				       unsigned long *bitmap,
+				       unsigned long base_iova,
+				       unsigned long pgshift)
+{
+	return -EINVAL;
+}
+
+static inline int iommu_clear_dirty_log(struct iommu_domain *domain,
+					unsigned long iova, size_t size,
+					unsigned long *bitmap,
+					unsigned long base_iova,
+					unsigned long pgshift)
+{
+	return -EINVAL;
+}
+
 static inline int  iommu_device_register(struct iommu_device *iommu)
 {
 	return -ENODEV;