diff mbox

xen/arm and swiotlb-xen: possible data corruption

Message ID alpine.DEB.2.10.1703011643300.13077@sstabellini-ThinkPad-X260 (mailing list archive)
State New, archived
Headers show

Commit Message

Stefano Stabellini March 2, 2017, 1:05 a.m. UTC
Hi all,

Edgar reported a data corruption on network packets in dom0 when the
swiotlb-xen is in use. He also reported that the following patch "fixes"
the problem for him:

 static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
                size_t size, enum dma_data_direction dir)
 {
-       dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_MAP);
+       printk("%s: addr=%lx size=%zd\n", __func__, handle, size);
+       dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size + 64, dir, DMA_MAP);

I am thinking that the problem has something to do with cacheline
alignment on the Xen side
(xen/common/grant_table.c:__gnttab_cache_flush).

If op == GNTTAB_CACHE_INVAL, we call invalidate_dcache_va_range; if op
== GNTTAB_CACHE_CLEAN, we call clean_dcache_va_range instead. The
parameter, v, could be non-cacheline aligned.

invalidate_dcache_va_range is capable of handling a not aligned address,
while clean_dcache_va_range does not.

Edgar, does the appended patch fix the problem for you?

---

Comments

Edgar E. Iglesias March 2, 2017, 8:38 a.m. UTC | #1
On Wed, Mar 01, 2017 at 05:05:21PM -0800, Stefano Stabellini wrote:
> Hi all,
> 
> Edgar reported a data corruption on network packets in dom0 when the
> swiotlb-xen is in use. He also reported that the following patch "fixes"
> the problem for him:
> 
>  static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
>                 size_t size, enum dma_data_direction dir)
>  {
> -       dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_MAP);
> +       printk("%s: addr=%lx size=%zd\n", __func__, handle, size);
> +       dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size + 64, dir, DMA_MAP);
> 
> I am thinking that the problem has something to do with cacheline
> alignment on the Xen side
> (xen/common/grant_table.c:__gnttab_cache_flush).
> 
> If op == GNTTAB_CACHE_INVAL, we call invalidate_dcache_va_range; if op
> == GNTTAB_CACHE_CLEAN, we call clean_dcache_va_range instead. The
> parameter, v, could be non-cacheline aligned.
> 
> invalidate_dcache_va_range is capable of handling a not aligned address,
> while clean_dcache_va_range does not.
> 
> Edgar, does the appended patch fix the problem for you?


Thanks Stefano,

This does indeed fix the issue for me.

Cheers,
Edgar


> 
> ---
> 
> diff --git a/xen/include/asm-arm/page.h b/xen/include/asm-arm/page.h
> index 86de0b6..9cdf2fb 100644
> --- a/xen/include/asm-arm/page.h
> +++ b/xen/include/asm-arm/page.h
> @@ -322,10 +322,30 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
>  
>  static inline int clean_dcache_va_range(const void *p, unsigned long size)
>  {
> -    const void *end;
> +    size_t off;
> +    const void *end = p + size;
> +
>      dsb(sy);           /* So the CPU issues all writes to the range */
> -    for ( end = p + size; p < end; p += cacheline_bytes )
> +
> +    off = (unsigned long)p % cacheline_bytes;
> +    if ( off )
> +    {
> +        p -= off;
>          asm volatile (__clean_dcache_one(0) : : "r" (p));
> +        p += cacheline_bytes;
> +        size -= cacheline_bytes - off;
> +    }
> +    off = (unsigned long)end % cacheline_bytes;
> +    if ( off )
> +    {
> +        end -= off;
> +        size -= off;
> +        asm volatile (__clean_dcache_one(0) : : "r" (end));
> +    }
> +
> +    for ( ; p < end; p += cacheline_bytes )
> +        asm volatile (__clean_dcache_one(0) : : "r" (p));
> +
>      dsb(sy);           /* So we know the flushes happen before continuing */
>      /* ARM callers assume that dcache_* functions cannot fail. */
>      return 0;
diff mbox

Patch

diff --git a/xen/include/asm-arm/page.h b/xen/include/asm-arm/page.h
index 86de0b6..9cdf2fb 100644
--- a/xen/include/asm-arm/page.h
+++ b/xen/include/asm-arm/page.h
@@ -322,10 +322,30 @@  static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
 
 static inline int clean_dcache_va_range(const void *p, unsigned long size)
 {
-    const void *end;
+    size_t off;
+    const void *end = p + size;
+
     dsb(sy);           /* So the CPU issues all writes to the range */
-    for ( end = p + size; p < end; p += cacheline_bytes )
+
+    off = (unsigned long)p % cacheline_bytes;
+    if ( off )
+    {
+        p -= off;
         asm volatile (__clean_dcache_one(0) : : "r" (p));
+        p += cacheline_bytes;
+        size -= cacheline_bytes - off;
+    }
+    off = (unsigned long)end % cacheline_bytes;
+    if ( off )
+    {
+        end -= off;
+        size -= off;
+        asm volatile (__clean_dcache_one(0) : : "r" (end));
+    }
+
+    for ( ; p < end; p += cacheline_bytes )
+        asm volatile (__clean_dcache_one(0) : : "r" (p));
+
     dsb(sy);           /* So we know the flushes happen before continuing */
     /* ARM callers assume that dcache_* functions cannot fail. */
     return 0;