diff mbox series

[4/5] iommu/arm-smmu: Make way to add Qcom's smmu-500 errata handling

Message ID 20180814105528.20592-5-vivek.gautam@codeaurora.org (mailing list archive)
State Deferred, archived
Delegated to: Andy Gross
Headers show
Series Qcom smmu-500 TLB invalidation errata for sdm845 | expand

Commit Message

Vivek Gautam Aug. 14, 2018, 10:55 a.m. UTC
Cleanup to re-use some of the stuff

Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
---
 drivers/iommu/arm-smmu.c | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

Comments

Will Deacon Aug. 14, 2018, 11:40 a.m. UTC | #1
On Tue, Aug 14, 2018 at 04:25:27PM +0530, Vivek Gautam wrote:
> Cleanup to re-use some of the stuff

Maybe we should factor a few of the other bits whilst we're here.

Or just write a proper commit message ;)

Will
Vivek Gautam Aug. 14, 2018, 12:28 p.m. UTC | #2
On 8/14/2018 5:10 PM, Will Deacon wrote:
> On Tue, Aug 14, 2018 at 04:25:27PM +0530, Vivek Gautam wrote:
>> Cleanup to re-use some of the stuff
> Maybe we should factor a few of the other bits whilst we're here.

Sure, do you want me to refactor anything besides this change?

>
> Or just write a proper commit message ;)

My bad. I should have written a more descriptive commit message. :|
Will change this.

Best regards
Vivek

>
> Will
Robin Murphy Aug. 14, 2018, 4:59 p.m. UTC | #3
On 14/08/18 11:55, Vivek Gautam wrote:
> Cleanup to re-use some of the stuff
> 
> Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
> ---
>   drivers/iommu/arm-smmu.c | 32 +++++++++++++++++++++++++-------
>   1 file changed, 25 insertions(+), 7 deletions(-)

I think the overall diffstat would be an awful lot smaller if the 
erratum workaround just has its own readl_poll_timeout() as it does in 
the vendor kernel. The burst-polling loop is for minimising latency in 
high-throughput situations, and if you're in a workaround which has to 
lock *every* register write and issue two firmware calls around each 
sync I think you're already well out of that game.

> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index 32e86df80428..75c146751c87 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -391,21 +391,31 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
>   	clear_bit(idx, map);
>   }
>   
> -/* Wait for any pending TLB invalidations to complete */
> -static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
> -				void __iomem *sync, void __iomem *status)
> +static int __arm_smmu_tlb_sync_wait(void __iomem *status)
>   {
>   	unsigned int spin_cnt, delay;
>   
> -	writel_relaxed(0, sync);
>   	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
>   		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
>   			if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
> -				return;
> +				return 0;
>   			cpu_relax();
>   		}
>   		udelay(delay);
>   	}
> +
> +	return -EBUSY;
> +}
> +
> +/* Wait for any pending TLB invalidations to complete */
> +static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
> +				void __iomem *sync, void __iomem *status)
> +{
> +	writel_relaxed(0, sync);
> +
> +	if (!__arm_smmu_tlb_sync_wait(status))
> +		return;
> +
>   	dev_err_ratelimited(smmu->dev,
>   			    "TLB sync timed out -- SMMU may be deadlocked\n");
>   }
> @@ -461,8 +471,9 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie)
>   	arm_smmu_tlb_sync_global(smmu);
>   }
>   
> -static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
> -					  size_t granule, bool leaf, void *cookie)
> +static void __arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
> +					    size_t granule, bool leaf,
> +					    void *cookie)
>   {
>   	struct arm_smmu_domain *smmu_domain = cookie;
>   	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> @@ -498,6 +509,13 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
>   	}
>   }
>   
> +static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
> +					  size_t granule, bool leaf,
> +					  void *cookie)
> +{
> +	__arm_smmu_tlb_inv_range_nosync(iova, size, granule, leaf, cookie);
> +}
> +

AFAICS even after patch #5 this does absolutely nothing except make the 
code needlessly harder to read :(

Robin.

>   /*
>    * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
>    * almost negligible, but the benefit of getting the first one in as far ahead
>
Vivek Gautam Aug. 28, 2018, 6:59 a.m. UTC | #4
Hi Robin,


On 8/14/2018 10:29 PM, Robin Murphy wrote:
> On 14/08/18 11:55, Vivek Gautam wrote:
>> Cleanup to re-use some of the stuff
>>
>> Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
>> ---
>>   drivers/iommu/arm-smmu.c | 32 +++++++++++++++++++++++++-------
>>   1 file changed, 25 insertions(+), 7 deletions(-)
>
> I think the overall diffstat would be an awful lot smaller if the 
> erratum workaround just has its own readl_poll_timeout() as it does in 
> the vendor kernel. The burst-polling loop is for minimising latency in 
> high-throughput situations, and if you're in a workaround which has to 
> lock *every* register write and issue two firmware calls around each 
> sync I think you're already well out of that game.

Sorry for the delayed response. I was on vacation.
I will fix this in my next version by adding the separate 
read_poll_timeout() for the erratum WA.

>
>> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
>> index 32e86df80428..75c146751c87 100644
>> --- a/drivers/iommu/arm-smmu.c
>> +++ b/drivers/iommu/arm-smmu.c
>> @@ -391,21 +391,31 @@ static void __arm_smmu_free_bitmap(unsigned 
>> long *map, int idx)
>>       clear_bit(idx, map);
>>   }
>>   -/* Wait for any pending TLB invalidations to complete */
>> -static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
>> -                void __iomem *sync, void __iomem *status)
>> +static int __arm_smmu_tlb_sync_wait(void __iomem *status)
>>   {
>>       unsigned int spin_cnt, delay;
>>   -    writel_relaxed(0, sync);
>>       for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
>>           for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
>>               if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
>> -                return;
>> +                return 0;
>>               cpu_relax();
>>           }
>>           udelay(delay);
>>       }
>> +
>> +    return -EBUSY;
>> +}
>> +
>> +/* Wait for any pending TLB invalidations to complete */
>> +static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
>> +                void __iomem *sync, void __iomem *status)
>> +{
>> +    writel_relaxed(0, sync);
>> +
>> +    if (!__arm_smmu_tlb_sync_wait(status))
>> +        return;
>> +
>>       dev_err_ratelimited(smmu->dev,
>>                   "TLB sync timed out -- SMMU may be deadlocked\n");
>>   }
>> @@ -461,8 +471,9 @@ static void arm_smmu_tlb_inv_context_s2(void 
>> *cookie)
>>       arm_smmu_tlb_sync_global(smmu);
>>   }
>>   -static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, 
>> size_t size,
>> -                      size_t granule, bool leaf, void *cookie)
>> +static void __arm_smmu_tlb_inv_range_nosync(unsigned long iova, 
>> size_t size,
>> +                        size_t granule, bool leaf,
>> +                        void *cookie)
>>   {
>>       struct arm_smmu_domain *smmu_domain = cookie;
>>       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>> @@ -498,6 +509,13 @@ static void 
>> arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
>>       }
>>   }
>>   +static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, 
>> size_t size,
>> +                      size_t granule, bool leaf,
>> +                      void *cookie)
>> +{
>> +    __arm_smmu_tlb_inv_range_nosync(iova, size, granule, leaf, cookie);
>> +}
>> +
>
> AFAICS even after patch #5 this does absolutely nothing except make 
> the code needlessly harder to read :(

Sure, I will rather call arm_smmu_tlb_inv_range_nosync() from
qcom_errata_tlb_inv_range_nosync() then make this change.
Thanks for the review.

Best regards
Vivek

>
> Robin.
>
>>   /*
>>    * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs 
>> appears
>>    * almost negligible, but the benefit of getting the first one in 
>> as far ahead
>>
diff mbox series

Patch

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 32e86df80428..75c146751c87 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -391,21 +391,31 @@  static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
 	clear_bit(idx, map);
 }
 
-/* Wait for any pending TLB invalidations to complete */
-static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
-				void __iomem *sync, void __iomem *status)
+static int __arm_smmu_tlb_sync_wait(void __iomem *status)
 {
 	unsigned int spin_cnt, delay;
 
-	writel_relaxed(0, sync);
 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
 			if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
-				return;
+				return 0;
 			cpu_relax();
 		}
 		udelay(delay);
 	}
+
+	return -EBUSY;
+}
+
+/* Wait for any pending TLB invalidations to complete */
+static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
+				void __iomem *sync, void __iomem *status)
+{
+	writel_relaxed(0, sync);
+
+	if (!__arm_smmu_tlb_sync_wait(status))
+		return;
+
 	dev_err_ratelimited(smmu->dev,
 			    "TLB sync timed out -- SMMU may be deadlocked\n");
 }
@@ -461,8 +471,9 @@  static void arm_smmu_tlb_inv_context_s2(void *cookie)
 	arm_smmu_tlb_sync_global(smmu);
 }
 
-static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
-					  size_t granule, bool leaf, void *cookie)
+static void __arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
+					    size_t granule, bool leaf,
+					    void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
@@ -498,6 +509,13 @@  static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 	}
 }
 
+static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
+					  size_t granule, bool leaf,
+					  void *cookie)
+{
+	__arm_smmu_tlb_inv_range_nosync(iova, size, granule, leaf, cookie);
+}
+
 /*
  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
  * almost negligible, but the benefit of getting the first one in as far ahead