[1/2] arm64/mm: check cpu cache line size with non-coherent device
diff mbox series

Message ID 20190611151731.6135-2-msys.mizuma@gmail.com
State New
Headers show
Series
  • Correct the cache line size warning
Related show

Commit Message

Masayoshi Mizuma June 11, 2019, 3:17 p.m. UTC
From: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>

As discussed in the thread [1], the cpu cache line size will be problem
only on non-coherent devices. And, the coherent flag is already introduced
to struct device.

Show the warning only if the device is non-coherent device and
ARCH_DMA_MINALIGN is smaller than the cpu cache size.

[1] https://lore.kernel.org/linux-arm-kernel/20180514145703.celnlobzn3uh5tc2@localhost/

Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Tested-by: Zhang Lei <zhang.lei@jp.fujitsu.com>
---
 arch/arm64/mm/dma-mapping.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

Comments

Catalin Marinas June 11, 2019, 6 p.m. UTC | #1
On Tue, Jun 11, 2019 at 11:17:30AM -0400, Masayoshi Mizuma wrote:
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -91,10 +91,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
>  
>  static int __init arm64_dma_init(void)
>  {
> -	WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
> -		   TAINT_CPU_OUT_OF_SPEC,
> -		   "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
> -		   ARCH_DMA_MINALIGN, cache_line_size());
>  	return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
>  }
>  arch_initcall(arm64_dma_init);
> @@ -473,6 +469,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
>  			const struct iommu_ops *iommu, bool coherent)
>  {
>  	dev->dma_coherent = coherent;
> +
> +	if (!coherent && (cache_line_size() > ARCH_DMA_MINALIGN))
> +		dev_WARN(dev, "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
> +				ARCH_DMA_MINALIGN, cache_line_size());

I'm ok in principle with this patch, with the minor issue that since
commit 7b8c87b297a7 ("arm64: cacheinfo: Update cache_line_size detected
from DT or PPTT") queued for 5.3 cache_line_size() gets the information
from DT or ACPI. The reason for this change is that the information is
used for performance tuning rather than DMA coherency.

You can go for a direct cache_type_cwg() check in here, unless Robin
(cc'ed) has a better idea.
Masayoshi Mizuma June 11, 2019, 10:02 p.m. UTC | #2
On Tue, Jun 11, 2019 at 07:00:07PM +0100, Catalin Marinas wrote:
> On Tue, Jun 11, 2019 at 11:17:30AM -0400, Masayoshi Mizuma wrote:
> > --- a/arch/arm64/mm/dma-mapping.c
> > +++ b/arch/arm64/mm/dma-mapping.c
> > @@ -91,10 +91,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
> >  
> >  static int __init arm64_dma_init(void)
> >  {
> > -	WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
> > -		   TAINT_CPU_OUT_OF_SPEC,
> > -		   "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
> > -		   ARCH_DMA_MINALIGN, cache_line_size());
> >  	return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
> >  }
> >  arch_initcall(arm64_dma_init);
> > @@ -473,6 +469,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
> >  			const struct iommu_ops *iommu, bool coherent)
> >  {
> >  	dev->dma_coherent = coherent;
> > +
> > +	if (!coherent && (cache_line_size() > ARCH_DMA_MINALIGN))
> > +		dev_WARN(dev, "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
> > +				ARCH_DMA_MINALIGN, cache_line_size());
> 
> I'm ok in principle with this patch, with the minor issue that since
> commit 7b8c87b297a7 ("arm64: cacheinfo: Update cache_line_size detected
> from DT or PPTT") queued for 5.3 cache_line_size() gets the information
> from DT or ACPI. The reason for this change is that the information is
> used for performance tuning rather than DMA coherency.
> 
> You can go for a direct cache_type_cwg() check in here, unless Robin
> (cc'ed) has a better idea.

Got it, thanks.
I believe coherency_max_size is zero in case of coherent is false,
so I'll modify the patch as following. Does it make sense?

@@ -57,6 +53,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
                        const struct iommu_ops *iommu, bool coherent)
 {
        dev->dma_coherent = coherent;
+
+       if (!coherent && (cache_line_size() > ARCH_DMA_MINALIGN))
+               dev_WARN(dev, "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
+                               ARCH_DMA_MINALIGN, (4 << cache_type_cwg()));
+
        if (iommu)
                iommu_setup_dma_ops(dev, dma_base, size);

Thanks,
Masa
Catalin Marinas June 13, 2019, 3:54 p.m. UTC | #3
On Tue, Jun 11, 2019 at 06:02:47PM -0400, Masayoshi Mizuma wrote:
> On Tue, Jun 11, 2019 at 07:00:07PM +0100, Catalin Marinas wrote:
> > On Tue, Jun 11, 2019 at 11:17:30AM -0400, Masayoshi Mizuma wrote:
> > > --- a/arch/arm64/mm/dma-mapping.c
> > > +++ b/arch/arm64/mm/dma-mapping.c
> > > @@ -91,10 +91,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
> > >  
> > >  static int __init arm64_dma_init(void)
> > >  {
> > > -	WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
> > > -		   TAINT_CPU_OUT_OF_SPEC,
> > > -		   "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
> > > -		   ARCH_DMA_MINALIGN, cache_line_size());
> > >  	return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
> > >  }
> > >  arch_initcall(arm64_dma_init);
> > > @@ -473,6 +469,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
> > >  			const struct iommu_ops *iommu, bool coherent)
> > >  {
> > >  	dev->dma_coherent = coherent;
> > > +
> > > +	if (!coherent && (cache_line_size() > ARCH_DMA_MINALIGN))
> > > +		dev_WARN(dev, "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
> > > +				ARCH_DMA_MINALIGN, cache_line_size());
> > 
> > I'm ok in principle with this patch, with the minor issue that since
> > commit 7b8c87b297a7 ("arm64: cacheinfo: Update cache_line_size detected
> > from DT or PPTT") queued for 5.3 cache_line_size() gets the information
> > from DT or ACPI. The reason for this change is that the information is
> > used for performance tuning rather than DMA coherency.
> > 
> > You can go for a direct cache_type_cwg() check in here, unless Robin
> > (cc'ed) has a better idea.
> 
> Got it, thanks.
> I believe coherency_max_size is zero in case of coherent is false,
> so I'll modify the patch as following. Does it make sense?

The coherency_max_size gives you the largest cache line in the system,
independent of whether a device is coherent or not. You may have a
device that does not snoop L1/L2 but there is a transparent L3 (system
cache) with a larger cache line that the device may be able to snoop.
The coherency_max_size and therefore cache_line_size() would give you
this L3 value but the device would work fine since CWG <=
ARCH_DMA_MINALIGN.

> 
> @@ -57,6 +53,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
>                         const struct iommu_ops *iommu, bool coherent)
>  {
>         dev->dma_coherent = coherent;
> +
> +       if (!coherent && (cache_line_size() > ARCH_DMA_MINALIGN))
> +               dev_WARN(dev, "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
> +                               ARCH_DMA_MINALIGN, (4 << cache_type_cwg()));
> +
>         if (iommu)
>                 iommu_setup_dma_ops(dev, dma_base, size);

I think the easiest here is to add a local variable:

	int cls = 4 << cache_type_cwg();

and check it against ARCH_DMA_MINALIGN.
Robin Murphy June 13, 2019, 5:10 p.m. UTC | #4
On 13/06/2019 16:54, Catalin Marinas wrote:
> On Tue, Jun 11, 2019 at 06:02:47PM -0400, Masayoshi Mizuma wrote:
>> On Tue, Jun 11, 2019 at 07:00:07PM +0100, Catalin Marinas wrote:
>>> On Tue, Jun 11, 2019 at 11:17:30AM -0400, Masayoshi Mizuma wrote:
>>>> --- a/arch/arm64/mm/dma-mapping.c
>>>> +++ b/arch/arm64/mm/dma-mapping.c
>>>> @@ -91,10 +91,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
>>>>   
>>>>   static int __init arm64_dma_init(void)
>>>>   {
>>>> -	WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
>>>> -		   TAINT_CPU_OUT_OF_SPEC,
>>>> -		   "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
>>>> -		   ARCH_DMA_MINALIGN, cache_line_size());
>>>>   	return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
>>>>   }
>>>>   arch_initcall(arm64_dma_init);
>>>> @@ -473,6 +469,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
>>>>   			const struct iommu_ops *iommu, bool coherent)
>>>>   {
>>>>   	dev->dma_coherent = coherent;
>>>> +
>>>> +	if (!coherent && (cache_line_size() > ARCH_DMA_MINALIGN))
>>>> +		dev_WARN(dev, "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
>>>> +				ARCH_DMA_MINALIGN, cache_line_size());
>>>
>>> I'm ok in principle with this patch, with the minor issue that since
>>> commit 7b8c87b297a7 ("arm64: cacheinfo: Update cache_line_size detected
>>> from DT or PPTT") queued for 5.3 cache_line_size() gets the information
>>> from DT or ACPI. The reason for this change is that the information is
>>> used for performance tuning rather than DMA coherency.
>>>
>>> You can go for a direct cache_type_cwg() check in here, unless Robin
>>> (cc'ed) has a better idea.
>>
>> Got it, thanks.
>> I believe coherency_max_size is zero in case of coherent is false,
>> so I'll modify the patch as following. Does it make sense?
> 
> The coherency_max_size gives you the largest cache line in the system,
> independent of whether a device is coherent or not. You may have a
> device that does not snoop L1/L2 but there is a transparent L3 (system
> cache) with a larger cache line that the device may be able to snoop.
> The coherency_max_size and therefore cache_line_size() would give you
> this L3 value but the device would work fine since CWG <=
> ARCH_DMA_MINALIGN.
> 
>>
>> @@ -57,6 +53,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
>>                          const struct iommu_ops *iommu, bool coherent)
>>   {
>>          dev->dma_coherent = coherent;
>> +
>> +       if (!coherent && (cache_line_size() > ARCH_DMA_MINALIGN))
>> +               dev_WARN(dev, "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
>> +                               ARCH_DMA_MINALIGN, (4 << cache_type_cwg()));
>> +
>>          if (iommu)
>>                  iommu_setup_dma_ops(dev, dma_base, size);
> 
> I think the easiest here is to add a local variable:
> 
> 	int cls = 4 << cache_type_cwg();
> 
> and check it against ARCH_DMA_MINALIGN.
> 

Agreed, and I'd say we should keep the taint too, since if this 
situation ever was hit the potential crashes would be weird and random 
and not obviously DMA-related.

Robin.

Patch
diff mbox series

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 674860e3e478..c0c09890c845 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -91,10 +91,6 @@  static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
 
 static int __init arm64_dma_init(void)
 {
-	WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
-		   TAINT_CPU_OUT_OF_SPEC,
-		   "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
-		   ARCH_DMA_MINALIGN, cache_line_size());
 	return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
 }
 arch_initcall(arm64_dma_init);
@@ -473,6 +469,11 @@  void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 			const struct iommu_ops *iommu, bool coherent)
 {
 	dev->dma_coherent = coherent;
+
+	if (!coherent && (cache_line_size() > ARCH_DMA_MINALIGN))
+		dev_WARN(dev, "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
+				ARCH_DMA_MINALIGN, cache_line_size());
+
 	__iommu_setup_dma_ops(dev, dma_base, size, iommu);
 
 #ifdef CONFIG_XEN