diff mbox series

[v2] arm64/mm: Correct the cache line size warning with non coherent device

Message ID 20190614131141.4428-1-msys.mizuma@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v2] arm64/mm: Correct the cache line size warning with non coherent device | expand

Commit Message

Masayoshi Mizuma June 14, 2019, 1:11 p.m. UTC
From: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>

If the cache line size is greater than ARCH_DMA_MINALIGN (128),
the warning shows and it's tainted as TAINT_CPU_OUT_OF_SPEC.

However, it's not good because as discussed in the thread [1], the cpu
cache line size will be problem only on non-coherent devices.

Since the coherent flag is already introduced to struct device,
show the warning only if the device is non-coherent device and
ARCH_DMA_MINALIGN is smaller than the cpu cache size.

[1] https://lore.kernel.org/linux-arm-kernel/20180514145703.celnlobzn3uh5tc2@localhost/

Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Tested-by: Zhang Lei <zhang.lei@jp.fujitsu.com>
---
 arch/arm64/include/asm/cache.h |  7 +++++++
 arch/arm64/kernel/cacheinfo.c  |  4 +---
 arch/arm64/mm/dma-mapping.c    | 14 ++++++++++----
 3 files changed, 18 insertions(+), 7 deletions(-)

Comments

Shaokun Zhang June 15, 2019, 2:44 a.m. UTC | #1
Hi Masayoshi,

A few trivial comments inline.

On 2019/6/14 21:11, Masayoshi Mizuma wrote:
> From: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
> 
> If the cache line size is greater than ARCH_DMA_MINALIGN (128),
> the warning shows and it's tainted as TAINT_CPU_OUT_OF_SPEC.
> 
> However, it's not good because as discussed in the thread [1], the cpu
> cache line size will be problem only on non-coherent devices.
> 
> Since the coherent flag is already introduced to struct device,
> show the warning only if the device is non-coherent device and
> ARCH_DMA_MINALIGN is smaller than the cpu cache size.
> 
> [1] https://lore.kernel.org/linux-arm-kernel/20180514145703.celnlobzn3uh5tc2@localhost/
> 
> Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
> Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
> Tested-by: Zhang Lei <zhang.lei@jp.fujitsu.com>
> ---
>  arch/arm64/include/asm/cache.h |  7 +++++++
>  arch/arm64/kernel/cacheinfo.c  |  4 +---
>  arch/arm64/mm/dma-mapping.c    | 14 ++++++++++----
>  3 files changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
> index 758af6340314..d24b7c1ecd9b 100644
> --- a/arch/arm64/include/asm/cache.h
> +++ b/arch/arm64/include/asm/cache.h
> @@ -91,6 +91,13 @@ static inline u32 cache_type_cwg(void)
>  
>  #define __read_mostly __attribute__((__section__(".data..read_mostly")))
>  
> +static inline int cache_line_size_of_cpu(void)
> +{
> +	u32 cwg = cache_type_cwg();
> +
> +	return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
> +}
> +
>  int cache_line_size(void);
>  
>  /*
> diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
> index 6eaf1c07aa4e..7fa6828bb488 100644
> --- a/arch/arm64/kernel/cacheinfo.c
> +++ b/arch/arm64/kernel/cacheinfo.c
> @@ -19,12 +19,10 @@
>  
>  int cache_line_size(void)
>  {
> -	u32 cwg = cache_type_cwg();
> -
>  	if (coherency_max_size != 0)
>  		return coherency_max_size;
>  
> -	return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
> +	return cache_line_size_of_cpu();
>  }

How about simplify it as this?

int cache_line_size(void)
{
        return coherency_max_size ? coherency_max_size :
                cache_line_size_of_cpu();
}

>  EXPORT_SYMBOL_GPL(cache_line_size);
>  
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index 1669618db08a..379589dc7113 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -38,10 +38,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
>  
>  static int __init arm64_dma_init(void)
>  {
> -	WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
> -		   TAINT_CPU_OUT_OF_SPEC,
> -		   "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
> -		   ARCH_DMA_MINALIGN, cache_line_size());
>  	return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
>  }
>  arch_initcall(arm64_dma_init);
> @@ -56,7 +52,17 @@ void arch_teardown_dma_ops(struct device *dev)
>  void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
>  			const struct iommu_ops *iommu, bool coherent)
>  {
> +	int cls = cache_line_size_of_cpu();

whether we need this local variable, how about use cache_line_size_of_cpu
directly in WARN_TAINT just like before.

Thanks,
Shaokun

> +
>  	dev->dma_coherent = coherent;
> +
> +	if (!coherent)
> +		WARN_TAINT(cls > ARCH_DMA_MINALIGN,
> +			TAINT_CPU_OUT_OF_SPEC,
> +			"%s %s: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
> +			dev_driver_string(dev), dev_name(dev),
> +			ARCH_DMA_MINALIGN, cls);
> +
>  	if (iommu)
>  		iommu_setup_dma_ops(dev, dma_base, size);
>  
>
Catalin Marinas June 17, 2019, 10:45 a.m. UTC | #2
On Sat, Jun 15, 2019 at 10:44:33AM +0800, Zhangshaokun wrote:
> On 2019/6/14 21:11, Masayoshi Mizuma wrote:
> > diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
> > index 6eaf1c07aa4e..7fa6828bb488 100644
> > --- a/arch/arm64/kernel/cacheinfo.c
> > +++ b/arch/arm64/kernel/cacheinfo.c
> > @@ -19,12 +19,10 @@
> >  
> >  int cache_line_size(void)
> >  {
> > -	u32 cwg = cache_type_cwg();
> > -
> >  	if (coherency_max_size != 0)
> >  		return coherency_max_size;
> >  
> > -	return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
> > +	return cache_line_size_of_cpu();
> >  }
> 
> How about simplify it as this?
> 
> int cache_line_size(void)
> {
>         return coherency_max_size ? coherency_max_size :
>                 cache_line_size_of_cpu();
> }

I don't see this as a simplification, easier to read with explicit 'if'.

> >  EXPORT_SYMBOL_GPL(cache_line_size);
> >  
> > diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> > index 1669618db08a..379589dc7113 100644
> > --- a/arch/arm64/mm/dma-mapping.c
> > +++ b/arch/arm64/mm/dma-mapping.c
> > @@ -38,10 +38,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
> >  
> >  static int __init arm64_dma_init(void)
> >  {
> > -	WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
> > -		   TAINT_CPU_OUT_OF_SPEC,
> > -		   "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
> > -		   ARCH_DMA_MINALIGN, cache_line_size());
> >  	return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
> >  }
> >  arch_initcall(arm64_dma_init);
> > @@ -56,7 +52,17 @@ void arch_teardown_dma_ops(struct device *dev)
> >  void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
> >  			const struct iommu_ops *iommu, bool coherent)
> >  {
> > +	int cls = cache_line_size_of_cpu();
> 
> whether we need this local variable, how about use cache_line_size_of_cpu
> directly in WARN_TAINT just like before.

The reason being?

Anyway, I'll queue v2 of this patch as is for 5.3. Thanks.
Shaokun Zhang June 17, 2019, 11 a.m. UTC | #3
Hi Catalin,

On 2019/6/17 18:45, Catalin Marinas wrote:
> On Sat, Jun 15, 2019 at 10:44:33AM +0800, Zhangshaokun wrote:
>> On 2019/6/14 21:11, Masayoshi Mizuma wrote:
>>> diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
>>> index 6eaf1c07aa4e..7fa6828bb488 100644
>>> --- a/arch/arm64/kernel/cacheinfo.c
>>> +++ b/arch/arm64/kernel/cacheinfo.c
>>> @@ -19,12 +19,10 @@
>>>  
>>>  int cache_line_size(void)
>>>  {
>>> -	u32 cwg = cache_type_cwg();
>>> -
>>>  	if (coherency_max_size != 0)
>>>  		return coherency_max_size;
>>>  
>>> -	return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
>>> +	return cache_line_size_of_cpu();
>>>  }
>>
>> How about simplify it as this?
>>
>> int cache_line_size(void)
>> {
>>         return coherency_max_size ? coherency_max_size :
>>                 cache_line_size_of_cpu();
>> }
> 
> I don't see this as a simplification, easier to read with explicit 'if'.
> 

Okay, I thought it can save some unnecessary lines :-).

>>>  EXPORT_SYMBOL_GPL(cache_line_size);
>>>  
>>> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
>>> index 1669618db08a..379589dc7113 100644
>>> --- a/arch/arm64/mm/dma-mapping.c
>>> +++ b/arch/arm64/mm/dma-mapping.c
>>> @@ -38,10 +38,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
>>>  
>>>  static int __init arm64_dma_init(void)
>>>  {
>>> -	WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
>>> -		   TAINT_CPU_OUT_OF_SPEC,
>>> -		   "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
>>> -		   ARCH_DMA_MINALIGN, cache_line_size());
>>>  	return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
>>>  }
>>>  arch_initcall(arm64_dma_init);
>>> @@ -56,7 +52,17 @@ void arch_teardown_dma_ops(struct device *dev)
>>>  void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
>>>  			const struct iommu_ops *iommu, bool coherent)
>>>  {
>>> +	int cls = cache_line_size_of_cpu();
>>
>> whether we need this local variable, how about use cache_line_size_of_cpu
>> directly in WARN_TAINT just like before.
> 
> The reason being?
> 

Since it is inline function,  maybe it is unnecessary, it is trivial.

> Anyway, I'll queue v2 of this patch as is for 5.3. Thanks.
> 

It's fine.

Thanks,
Shaokun
Catalin Marinas June 17, 2019, 4:22 p.m. UTC | #4
On Mon, Jun 17, 2019 at 07:00:34PM +0800, Zhangshaokun wrote:
> On 2019/6/17 18:45, Catalin Marinas wrote:
> > On Sat, Jun 15, 2019 at 10:44:33AM +0800, Zhangshaokun wrote:
> >> On 2019/6/14 21:11, Masayoshi Mizuma wrote:
> >>> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> >>> index 1669618db08a..379589dc7113 100644
> >>> --- a/arch/arm64/mm/dma-mapping.c
> >>> +++ b/arch/arm64/mm/dma-mapping.c
> >>> @@ -38,10 +38,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
> >>>  
> >>>  static int __init arm64_dma_init(void)
> >>>  {
> >>> -	WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
> >>> -		   TAINT_CPU_OUT_OF_SPEC,
> >>> -		   "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
> >>> -		   ARCH_DMA_MINALIGN, cache_line_size());
> >>>  	return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
> >>>  }
> >>>  arch_initcall(arm64_dma_init);
> >>> @@ -56,7 +52,17 @@ void arch_teardown_dma_ops(struct device *dev)
> >>>  void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
> >>>  			const struct iommu_ops *iommu, bool coherent)
> >>>  {
> >>> +	int cls = cache_line_size_of_cpu();
> >>
> >> whether we need this local variable, how about use cache_line_size_of_cpu
> >> directly in WARN_TAINT just like before.
> > 
> > The reason being?
> 
> Since it is inline function,  maybe it is unnecessary, it is trivial.

OTOH, you end up with two reads from the CTR_EL0 register.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 758af6340314..d24b7c1ecd9b 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -91,6 +91,13 @@  static inline u32 cache_type_cwg(void)
 
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))
 
+static inline int cache_line_size_of_cpu(void)
+{
+	u32 cwg = cache_type_cwg();
+
+	return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
+}
+
 int cache_line_size(void);
 
 /*
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index 6eaf1c07aa4e..7fa6828bb488 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -19,12 +19,10 @@ 
 
 int cache_line_size(void)
 {
-	u32 cwg = cache_type_cwg();
-
 	if (coherency_max_size != 0)
 		return coherency_max_size;
 
-	return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
+	return cache_line_size_of_cpu();
 }
 EXPORT_SYMBOL_GPL(cache_line_size);
 
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 1669618db08a..379589dc7113 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -38,10 +38,6 @@  void arch_dma_prep_coherent(struct page *page, size_t size)
 
 static int __init arm64_dma_init(void)
 {
-	WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
-		   TAINT_CPU_OUT_OF_SPEC,
-		   "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
-		   ARCH_DMA_MINALIGN, cache_line_size());
 	return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
 }
 arch_initcall(arm64_dma_init);
@@ -56,7 +52,17 @@  void arch_teardown_dma_ops(struct device *dev)
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 			const struct iommu_ops *iommu, bool coherent)
 {
+	int cls = cache_line_size_of_cpu();
+
 	dev->dma_coherent = coherent;
+
+	if (!coherent)
+		WARN_TAINT(cls > ARCH_DMA_MINALIGN,
+			TAINT_CPU_OUT_OF_SPEC,
+			"%s %s: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
+			dev_driver_string(dev), dev_name(dev),
+			ARCH_DMA_MINALIGN, cls);
+
 	if (iommu)
 		iommu_setup_dma_ops(dev, dma_base, size);