diff mbox series

[v7,14/23] iommu/smmuv3: Implement cache_invalidate

Message ID 20190408121911.24103-15-eric.auger@redhat.com (mailing list archive)
State New, archived
Headers show
Series SMMUv3 Nested Stage Setup | expand

Commit Message

Eric Auger April 8, 2019, 12:19 p.m. UTC
Implement domain-selective and page-selective IOTLB invalidations.

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---
v6 -> v7
- check the uapi version

v3 -> v4:
- adapt to changes in the uapi
- add support for leaf parameter
- do not use arm_smmu_tlb_inv_range_nosync or arm_smmu_tlb_inv_context
  anymore

v2 -> v3:
- replace __arm_smmu_tlb_sync by arm_smmu_cmdq_issue_sync

v1 -> v2:
- properly pass the asid
---
 drivers/iommu/arm-smmu-v3.c | 60 +++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

Comments

Robin Murphy May 8, 2019, 3:01 p.m. UTC | #1
On 08/04/2019 13:19, Eric Auger wrote:
> Implement domain-selective and page-selective IOTLB invalidations.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> ---
> v6 -> v7
> - check the uapi version
> 
> v3 -> v4:
> - adapt to changes in the uapi
> - add support for leaf parameter
> - do not use arm_smmu_tlb_inv_range_nosync or arm_smmu_tlb_inv_context
>    anymore
> 
> v2 -> v3:
> - replace __arm_smmu_tlb_sync by arm_smmu_cmdq_issue_sync
> 
> v1 -> v2:
> - properly pass the asid
> ---
>   drivers/iommu/arm-smmu-v3.c | 60 +++++++++++++++++++++++++++++++++++++
>   1 file changed, 60 insertions(+)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 1486baf53425..4366921d8318 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -2326,6 +2326,65 @@ static void arm_smmu_detach_pasid_table(struct iommu_domain *domain)
>   	mutex_unlock(&smmu_domain->init_mutex);
>   }
>   
> +static int
> +arm_smmu_cache_invalidate(struct iommu_domain *domain, struct device *dev,
> +			  struct iommu_cache_invalidate_info *inv_info)
> +{
> +	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +	struct arm_smmu_device *smmu = smmu_domain->smmu;
> +
> +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
> +		return -EINVAL;
> +
> +	if (!smmu)
> +		return -EINVAL;
> +
> +	if (inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
> +		return -EINVAL;
> +
> +	if (inv_info->cache & IOMMU_CACHE_INV_TYPE_IOTLB) {
> +		if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
> +			struct arm_smmu_cmdq_ent cmd = {
> +				.opcode = CMDQ_OP_TLBI_NH_ASID,
> +				.tlbi = {
> +					.vmid = smmu_domain->s2_cfg.vmid,
> +					.asid = inv_info->pasid,
> +				},
> +			};
> +
> +			arm_smmu_cmdq_issue_cmd(smmu, &cmd);
> +			arm_smmu_cmdq_issue_sync(smmu);

I'd much rather make arm_smmu_tlb_inv_context() understand nested 
domains than open-code commands all over the place.

> +
> +		} else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
> +			struct iommu_inv_addr_info *info = &inv_info->addr_info;
> +			size_t size = info->nb_granules * info->granule_size;
> +			bool leaf = info->flags & IOMMU_INV_ADDR_FLAGS_LEAF;
> +			struct arm_smmu_cmdq_ent cmd = {
> +				.opcode = CMDQ_OP_TLBI_NH_VA,
> +				.tlbi = {
> +					.addr = info->addr,
> +					.vmid = smmu_domain->s2_cfg.vmid,
> +					.asid = info->pasid,
> +					.leaf = leaf,
> +				},
> +			};
> +
> +			do {
> +				arm_smmu_cmdq_issue_cmd(smmu, &cmd);
> +				cmd.tlbi.addr += info->granule_size;
> +			} while (size -= info->granule_size);
> +			arm_smmu_cmdq_issue_sync(smmu);

An this in particular I would really like to go all the way through 
io_pgtable_tlb_add_flush()/io_pgtable_sync() if at all possible. Hooking 
up range-based invalidations is going to be a massive headache if the 
abstraction isn't solid.

Robin.

> +		} else {
> +			return -EINVAL;
> +		}
> +	}
> +	if (inv_info->cache & IOMMU_CACHE_INV_TYPE_PASID ||
> +	    inv_info->cache & IOMMU_CACHE_INV_TYPE_DEV_IOTLB) {
> +		return -ENOENT;
> +	}
> +	return 0;
> +}
> +
>   static struct iommu_ops arm_smmu_ops = {
>   	.capable		= arm_smmu_capable,
>   	.domain_alloc		= arm_smmu_domain_alloc,
> @@ -2346,6 +2405,7 @@ static struct iommu_ops arm_smmu_ops = {
>   	.put_resv_regions	= arm_smmu_put_resv_regions,
>   	.attach_pasid_table	= arm_smmu_attach_pasid_table,
>   	.detach_pasid_table	= arm_smmu_detach_pasid_table,
> +	.cache_invalidate	= arm_smmu_cache_invalidate,
>   	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
>   };
>   
>
Eric Auger May 13, 2019, 12:16 p.m. UTC | #2
Hi Robin,
On 5/8/19 5:01 PM, Robin Murphy wrote:
> On 08/04/2019 13:19, Eric Auger wrote:
>> Implement domain-selective and page-selective IOTLB invalidations.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>
>> ---
>> v6 -> v7
>> - check the uapi version
>>
>> v3 -> v4:
>> - adapt to changes in the uapi
>> - add support for leaf parameter
>> - do not use arm_smmu_tlb_inv_range_nosync or arm_smmu_tlb_inv_context
>>    anymore
>>
>> v2 -> v3:
>> - replace __arm_smmu_tlb_sync by arm_smmu_cmdq_issue_sync
>>
>> v1 -> v2:
>> - properly pass the asid
>> ---
>>   drivers/iommu/arm-smmu-v3.c | 60 +++++++++++++++++++++++++++++++++++++
>>   1 file changed, 60 insertions(+)
>>
>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>> index 1486baf53425..4366921d8318 100644
>> --- a/drivers/iommu/arm-smmu-v3.c
>> +++ b/drivers/iommu/arm-smmu-v3.c
>> @@ -2326,6 +2326,65 @@ static void arm_smmu_detach_pasid_table(struct
>> iommu_domain *domain)
>>       mutex_unlock(&smmu_domain->init_mutex);
>>   }
>>   +static int
>> +arm_smmu_cache_invalidate(struct iommu_domain *domain, struct device
>> *dev,
>> +              struct iommu_cache_invalidate_info *inv_info)
>> +{
>> +    struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>> +    struct arm_smmu_device *smmu = smmu_domain->smmu;
>> +
>> +    if (smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
>> +        return -EINVAL;
>> +
>> +    if (!smmu)
>> +        return -EINVAL;
>> +
>> +    if (inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
>> +        return -EINVAL;
>> +
>> +    if (inv_info->cache & IOMMU_CACHE_INV_TYPE_IOTLB) {
>> +        if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
>> +            struct arm_smmu_cmdq_ent cmd = {
>> +                .opcode = CMDQ_OP_TLBI_NH_ASID,
>> +                .tlbi = {
>> +                    .vmid = smmu_domain->s2_cfg.vmid,
>> +                    .asid = inv_info->pasid,
>> +                },
>> +            };
>> +
>> +            arm_smmu_cmdq_issue_cmd(smmu, &cmd);
>> +            arm_smmu_cmdq_issue_sync(smmu);
> 
> I'd much rather make arm_smmu_tlb_inv_context() understand nested
> domains than open-code commands all over the place.


> 
>> +
>> +        } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
>> +            struct iommu_inv_addr_info *info = &inv_info->addr_info;
>> +            size_t size = info->nb_granules * info->granule_size;
>> +            bool leaf = info->flags & IOMMU_INV_ADDR_FLAGS_LEAF;
>> +            struct arm_smmu_cmdq_ent cmd = {
>> +                .opcode = CMDQ_OP_TLBI_NH_VA,
>> +                .tlbi = {
>> +                    .addr = info->addr,
>> +                    .vmid = smmu_domain->s2_cfg.vmid,
>> +                    .asid = info->pasid,
>> +                    .leaf = leaf,
>> +                },
>> +            };
>> +
>> +            do {
>> +                arm_smmu_cmdq_issue_cmd(smmu, &cmd);
>> +                cmd.tlbi.addr += info->granule_size;
>> +            } while (size -= info->granule_size);
>> +            arm_smmu_cmdq_issue_sync(smmu);
> 
> An this in particular I would really like to go all the way through
> io_pgtable_tlb_add_flush()/io_pgtable_sync() if at all possible. Hooking
> up range-based invalidations is going to be a massive headache if the
> abstraction isn't solid.

The concern is the host does not "own" the s1 config asid
(smmu_domain->s1_cfg.cd.asid is not set, practically). In our case the
asid only is passed by the userspace on CACHE_INVALIDATE ioctl call.

arm_smmu_tlb_inv_context and arm_smmu_tlb_inv_range_nosync use this field

Jean-Philippe highlighted that fact in
https://lkml.org/lkml/2019/1/11/1062 and suggested to open-code commands
instead, hence the current form.

Thanks

Eric
> 
> Robin.
> 
>> +        } else {
>> +            return -EINVAL;
>> +        }
>> +    }
>> +    if (inv_info->cache & IOMMU_CACHE_INV_TYPE_PASID ||
>> +        inv_info->cache & IOMMU_CACHE_INV_TYPE_DEV_IOTLB) {
>> +        return -ENOENT;
>> +    }
>> +    return 0;
>> +}
>> +
>>   static struct iommu_ops arm_smmu_ops = {
>>       .capable        = arm_smmu_capable,
>>       .domain_alloc        = arm_smmu_domain_alloc,
>> @@ -2346,6 +2405,7 @@ static struct iommu_ops arm_smmu_ops = {
>>       .put_resv_regions    = arm_smmu_put_resv_regions,
>>       .attach_pasid_table    = arm_smmu_attach_pasid_table,
>>       .detach_pasid_table    = arm_smmu_detach_pasid_table,
>> +    .cache_invalidate    = arm_smmu_cache_invalidate,
>>       .pgsize_bitmap        = -1UL, /* Restricted during device attach */
>>   };
>>
Robin Murphy May 13, 2019, 2:01 p.m. UTC | #3
On 13/05/2019 13:16, Auger Eric wrote:
> Hi Robin,
> On 5/8/19 5:01 PM, Robin Murphy wrote:
>> On 08/04/2019 13:19, Eric Auger wrote:
>>> Implement domain-selective and page-selective IOTLB invalidations.
>>>
>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>
>>> ---
>>> v6 -> v7
>>> - check the uapi version
>>>
>>> v3 -> v4:
>>> - adapt to changes in the uapi
>>> - add support for leaf parameter
>>> - do not use arm_smmu_tlb_inv_range_nosync or arm_smmu_tlb_inv_context
>>>     anymore
>>>
>>> v2 -> v3:
>>> - replace __arm_smmu_tlb_sync by arm_smmu_cmdq_issue_sync
>>>
>>> v1 -> v2:
>>> - properly pass the asid
>>> ---
>>>    drivers/iommu/arm-smmu-v3.c | 60 +++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 60 insertions(+)
>>>
>>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>>> index 1486baf53425..4366921d8318 100644
>>> --- a/drivers/iommu/arm-smmu-v3.c
>>> +++ b/drivers/iommu/arm-smmu-v3.c
>>> @@ -2326,6 +2326,65 @@ static void arm_smmu_detach_pasid_table(struct
>>> iommu_domain *domain)
>>>        mutex_unlock(&smmu_domain->init_mutex);
>>>    }
>>>    +static int
>>> +arm_smmu_cache_invalidate(struct iommu_domain *domain, struct device
>>> *dev,
>>> +              struct iommu_cache_invalidate_info *inv_info)
>>> +{
>>> +    struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>>> +    struct arm_smmu_device *smmu = smmu_domain->smmu;
>>> +
>>> +    if (smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
>>> +        return -EINVAL;
>>> +
>>> +    if (!smmu)
>>> +        return -EINVAL;
>>> +
>>> +    if (inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
>>> +        return -EINVAL;
>>> +
>>> +    if (inv_info->cache & IOMMU_CACHE_INV_TYPE_IOTLB) {
>>> +        if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
>>> +            struct arm_smmu_cmdq_ent cmd = {
>>> +                .opcode = CMDQ_OP_TLBI_NH_ASID,
>>> +                .tlbi = {
>>> +                    .vmid = smmu_domain->s2_cfg.vmid,
>>> +                    .asid = inv_info->pasid,
>>> +                },
>>> +            };
>>> +
>>> +            arm_smmu_cmdq_issue_cmd(smmu, &cmd);
>>> +            arm_smmu_cmdq_issue_sync(smmu);
>>
>> I'd much rather make arm_smmu_tlb_inv_context() understand nested
>> domains than open-code commands all over the place.
> 
> 
>>
>>> +
>>> +        } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
>>> +            struct iommu_inv_addr_info *info = &inv_info->addr_info;
>>> +            size_t size = info->nb_granules * info->granule_size;
>>> +            bool leaf = info->flags & IOMMU_INV_ADDR_FLAGS_LEAF;
>>> +            struct arm_smmu_cmdq_ent cmd = {
>>> +                .opcode = CMDQ_OP_TLBI_NH_VA,
>>> +                .tlbi = {
>>> +                    .addr = info->addr,
>>> +                    .vmid = smmu_domain->s2_cfg.vmid,
>>> +                    .asid = info->pasid,
>>> +                    .leaf = leaf,
>>> +                },
>>> +            };
>>> +
>>> +            do {
>>> +                arm_smmu_cmdq_issue_cmd(smmu, &cmd);
>>> +                cmd.tlbi.addr += info->granule_size;
>>> +            } while (size -= info->granule_size);
>>> +            arm_smmu_cmdq_issue_sync(smmu);
>>
>> An this in particular I would really like to go all the way through
>> io_pgtable_tlb_add_flush()/io_pgtable_sync() if at all possible. Hooking
>> up range-based invalidations is going to be a massive headache if the
>> abstraction isn't solid.
> 
> The concern is the host does not "own" the s1 config asid
> (smmu_domain->s1_cfg.cd.asid is not set, practically). In our case the
> asid only is passed by the userspace on CACHE_INVALIDATE ioctl call.
> 
> arm_smmu_tlb_inv_context and arm_smmu_tlb_inv_range_nosync use this field

Right, but that's not exactly hard to solve. Even just something like 
the (untested, purely illustrative) refactoring below would be beneficial.

Robin.

----->8-----
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index d3880010c6cf..31ef703cf671 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1423,11 +1423,9 @@ static void arm_smmu_tlb_inv_context(void *cookie)
  	arm_smmu_cmdq_issue_sync(smmu);
  }

-static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
-					  size_t granule, bool leaf, void *cookie)
+static void __arm_smmu_tlb_inv_range(struct arm_smmu_domain 
*smmu_domain, u16 asid,
+		unsigned long iova, size_t size, size_t granule, bool leaf)
  {
-	struct arm_smmu_domain *smmu_domain = cookie;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
  	struct arm_smmu_cmdq_ent cmd = {
  		.tlbi = {
  			.leaf	= leaf,
@@ -1437,18 +1435,27 @@ static void 
arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,

  	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
  		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
-		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
+		cmd.tlbi.asid	= asid;
  	} else {
  		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
  		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
  	}

  	do {
-		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+		arm_smmu_cmdq_issue_cmd(smmu_domain->smmu, &cmd);
  		cmd.tlbi.addr += granule;
  	} while (size -= granule);
  }

+static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
+					  size_t granule, bool leaf, void *cookie)
+{
+	struct arm_smmu_domain *smmu_domain = cookie;
+
+	__arm_smmu_tlb_inv_range(smmu_domain, smmu_domain->s1_cfg.cd.asid, iova,
+			size, granule, leaf);
+}
+
  static const struct iommu_gather_ops arm_smmu_gather_ops = {
  	.tlb_flush_all	= arm_smmu_tlb_inv_context,
  	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
Eric Auger May 13, 2019, 2:04 p.m. UTC | #4
Hi Robin,

On 5/13/19 4:01 PM, Robin Murphy wrote:
> On 13/05/2019 13:16, Auger Eric wrote:
>> Hi Robin,
>> On 5/8/19 5:01 PM, Robin Murphy wrote:
>>> On 08/04/2019 13:19, Eric Auger wrote:
>>>> Implement domain-selective and page-selective IOTLB invalidations.
>>>>
>>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>>
>>>> ---
>>>> v6 -> v7
>>>> - check the uapi version
>>>>
>>>> v3 -> v4:
>>>> - adapt to changes in the uapi
>>>> - add support for leaf parameter
>>>> - do not use arm_smmu_tlb_inv_range_nosync or arm_smmu_tlb_inv_context
>>>>     anymore
>>>>
>>>> v2 -> v3:
>>>> - replace __arm_smmu_tlb_sync by arm_smmu_cmdq_issue_sync
>>>>
>>>> v1 -> v2:
>>>> - properly pass the asid
>>>> ---
>>>>    drivers/iommu/arm-smmu-v3.c | 60
>>>> +++++++++++++++++++++++++++++++++++++
>>>>    1 file changed, 60 insertions(+)
>>>>
>>>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>>>> index 1486baf53425..4366921d8318 100644
>>>> --- a/drivers/iommu/arm-smmu-v3.c
>>>> +++ b/drivers/iommu/arm-smmu-v3.c
>>>> @@ -2326,6 +2326,65 @@ static void arm_smmu_detach_pasid_table(struct
>>>> iommu_domain *domain)
>>>>        mutex_unlock(&smmu_domain->init_mutex);
>>>>    }
>>>>    +static int
>>>> +arm_smmu_cache_invalidate(struct iommu_domain *domain, struct device
>>>> *dev,
>>>> +              struct iommu_cache_invalidate_info *inv_info)
>>>> +{
>>>> +    struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>>>> +    struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>> +
>>>> +    if (smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
>>>> +        return -EINVAL;
>>>> +
>>>> +    if (!smmu)
>>>> +        return -EINVAL;
>>>> +
>>>> +    if (inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
>>>> +        return -EINVAL;
>>>> +
>>>> +    if (inv_info->cache & IOMMU_CACHE_INV_TYPE_IOTLB) {
>>>> +        if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
>>>> +            struct arm_smmu_cmdq_ent cmd = {
>>>> +                .opcode = CMDQ_OP_TLBI_NH_ASID,
>>>> +                .tlbi = {
>>>> +                    .vmid = smmu_domain->s2_cfg.vmid,
>>>> +                    .asid = inv_info->pasid,
>>>> +                },
>>>> +            };
>>>> +
>>>> +            arm_smmu_cmdq_issue_cmd(smmu, &cmd);
>>>> +            arm_smmu_cmdq_issue_sync(smmu);
>>>
>>> I'd much rather make arm_smmu_tlb_inv_context() understand nested
>>> domains than open-code commands all over the place.
>>
>>
>>>
>>>> +
>>>> +        } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
>>>> +            struct iommu_inv_addr_info *info = &inv_info->addr_info;
>>>> +            size_t size = info->nb_granules * info->granule_size;
>>>> +            bool leaf = info->flags & IOMMU_INV_ADDR_FLAGS_LEAF;
>>>> +            struct arm_smmu_cmdq_ent cmd = {
>>>> +                .opcode = CMDQ_OP_TLBI_NH_VA,
>>>> +                .tlbi = {
>>>> +                    .addr = info->addr,
>>>> +                    .vmid = smmu_domain->s2_cfg.vmid,
>>>> +                    .asid = info->pasid,
>>>> +                    .leaf = leaf,
>>>> +                },
>>>> +            };
>>>> +
>>>> +            do {
>>>> +                arm_smmu_cmdq_issue_cmd(smmu, &cmd);
>>>> +                cmd.tlbi.addr += info->granule_size;
>>>> +            } while (size -= info->granule_size);
>>>> +            arm_smmu_cmdq_issue_sync(smmu);
>>>
>>> An this in particular I would really like to go all the way through
>>> io_pgtable_tlb_add_flush()/io_pgtable_sync() if at all possible. Hooking
>>> up range-based invalidations is going to be a massive headache if the
>>> abstraction isn't solid.
>>
>> The concern is the host does not "own" the s1 config asid
>> (smmu_domain->s1_cfg.cd.asid is not set, practically). In our case the
>> asid only is passed by the userspace on CACHE_INVALIDATE ioctl call.
>>
>> arm_smmu_tlb_inv_context and arm_smmu_tlb_inv_range_nosync use this field
> 
> Right, but that's not exactly hard to solve. Even just something like
> the (untested, purely illustrative) refactoring below would be beneficial.
Sure I can go this way.

Thank you for detailing

Eric
> 
> Robin.
> 
> ----->8-----
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index d3880010c6cf..31ef703cf671 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -1423,11 +1423,9 @@ static void arm_smmu_tlb_inv_context(void *cookie)
>      arm_smmu_cmdq_issue_sync(smmu);
>  }
> 
> -static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
> -                      size_t granule, bool leaf, void *cookie)
> +static void __arm_smmu_tlb_inv_range(struct arm_smmu_domain
> *smmu_domain, u16 asid,
> +        unsigned long iova, size_t size, size_t granule, bool leaf)
>  {
> -    struct arm_smmu_domain *smmu_domain = cookie;
> -    struct arm_smmu_device *smmu = smmu_domain->smmu;
>      struct arm_smmu_cmdq_ent cmd = {
>          .tlbi = {
>              .leaf    = leaf,
> @@ -1437,18 +1435,27 @@ static void
> arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
> 
>      if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
>          cmd.opcode    = CMDQ_OP_TLBI_NH_VA;
> -        cmd.tlbi.asid    = smmu_domain->s1_cfg.cd.asid;
> +        cmd.tlbi.asid    = asid;
>      } else {
>          cmd.opcode    = CMDQ_OP_TLBI_S2_IPA;
>          cmd.tlbi.vmid    = smmu_domain->s2_cfg.vmid;
>      }
> 
>      do {
> -        arm_smmu_cmdq_issue_cmd(smmu, &cmd);
> +        arm_smmu_cmdq_issue_cmd(smmu_domain->smmu, &cmd);
>          cmd.tlbi.addr += granule;
>      } while (size -= granule);
>  }
> 
> +static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
> +                      size_t granule, bool leaf, void *cookie)
> +{
> +    struct arm_smmu_domain *smmu_domain = cookie;
> +
> +    __arm_smmu_tlb_inv_range(smmu_domain, smmu_domain->s1_cfg.cd.asid,
> iova,
> +            size, granule, leaf);
> +}
> +
>  static const struct iommu_gather_ops arm_smmu_gather_ops = {
>      .tlb_flush_all    = arm_smmu_tlb_inv_context,
>      .tlb_add_flush    = arm_smmu_tlb_inv_range_nosync,
diff mbox series

Patch

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 1486baf53425..4366921d8318 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2326,6 +2326,65 @@  static void arm_smmu_detach_pasid_table(struct iommu_domain *domain)
 	mutex_unlock(&smmu_domain->init_mutex);
 }
 
+static int
+arm_smmu_cache_invalidate(struct iommu_domain *domain, struct device *dev,
+			  struct iommu_cache_invalidate_info *inv_info)
+{
+	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+
+	if (smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
+		return -EINVAL;
+
+	if (!smmu)
+		return -EINVAL;
+
+	if (inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
+		return -EINVAL;
+
+	if (inv_info->cache & IOMMU_CACHE_INV_TYPE_IOTLB) {
+		if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
+			struct arm_smmu_cmdq_ent cmd = {
+				.opcode = CMDQ_OP_TLBI_NH_ASID,
+				.tlbi = {
+					.vmid = smmu_domain->s2_cfg.vmid,
+					.asid = inv_info->pasid,
+				},
+			};
+
+			arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+			arm_smmu_cmdq_issue_sync(smmu);
+
+		} else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
+			struct iommu_inv_addr_info *info = &inv_info->addr_info;
+			size_t size = info->nb_granules * info->granule_size;
+			bool leaf = info->flags & IOMMU_INV_ADDR_FLAGS_LEAF;
+			struct arm_smmu_cmdq_ent cmd = {
+				.opcode = CMDQ_OP_TLBI_NH_VA,
+				.tlbi = {
+					.addr = info->addr,
+					.vmid = smmu_domain->s2_cfg.vmid,
+					.asid = info->pasid,
+					.leaf = leaf,
+				},
+			};
+
+			do {
+				arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+				cmd.tlbi.addr += info->granule_size;
+			} while (size -= info->granule_size);
+			arm_smmu_cmdq_issue_sync(smmu);
+		} else {
+			return -EINVAL;
+		}
+	}
+	if (inv_info->cache & IOMMU_CACHE_INV_TYPE_PASID ||
+	    inv_info->cache & IOMMU_CACHE_INV_TYPE_DEV_IOTLB) {
+		return -ENOENT;
+	}
+	return 0;
+}
+
 static struct iommu_ops arm_smmu_ops = {
 	.capable		= arm_smmu_capable,
 	.domain_alloc		= arm_smmu_domain_alloc,
@@ -2346,6 +2405,7 @@  static struct iommu_ops arm_smmu_ops = {
 	.put_resv_regions	= arm_smmu_put_resv_regions,
 	.attach_pasid_table	= arm_smmu_attach_pasid_table,
 	.detach_pasid_table	= arm_smmu_detach_pasid_table,
+	.cache_invalidate	= arm_smmu_cache_invalidate,
 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
 };