Message ID | 1412348006-27884-2-git-send-email-stefano.stabellini@eu.citrix.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 03/10/14 15:53, Stefano Stabellini wrote: > Introduce support for new hypercall GNTTABOP_cache_flush. > Use it to perform cache flashing on pages used for dma when necessary. [..] > /* functions called by SWIOTLB */ > @@ -22,16 +25,31 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset, > size_t len = left; > void *vaddr; > > + if (len + offset > PAGE_SIZE) > + len = PAGE_SIZE - offset; > + > if (!pfn_valid(pfn)) > { > - /* TODO: cache flush */ > + struct gnttab_cache_flush cflush; > + > + cflush.op = 0; > + cflush.a.dev_bus_addr = pfn << PAGE_SHIFT; > + cflush.offset = offset; > + cflush.size = len; > + > + if (op == dmac_unmap_area && dir != DMA_TO_DEVICE) > + cflush.op = GNTTAB_CACHE_INVAL; > + if (op == dmac_map_area) { > + cflush.op = GNTTAB_CACHE_CLEAN; > + if (dir == DMA_FROM_DEVICE) > + cflush.op |= GNTTAB_CACHE_INVAL; > + } Are all these cache operations needed? You do a clean on map regardless of the direction and INVAL on map seems unnecessary. I would have thought it would be: map && (TO_DEVICE || BOTH) op = CLEAN unmap && (FROM_DEVICE || BOTH) op = INVAL > --- a/include/xen/interface/grant_table.h > +++ b/include/xen/interface/grant_table.h > @@ -479,6 +479,24 @@ struct gnttab_get_version { > DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version); > > /* > + * Issue one or more cache maintenance operations on a portion of a > + * page granted to the calling domain by a foreign domain. > + */ > +#define GNTTABOP_cache_flush 12 > +struct gnttab_cache_flush { > + union { > + uint64_t dev_bus_addr; > + grant_ref_t ref; > + } a; > + uint32_t offset; /* offset from start of grant */ > + uint32_t size; /* size within the grant */ > +#define GNTTAB_CACHE_CLEAN (1<<0) > +#define GNTTAB_CACHE_INVAL (1<<1) > + uint32_t op; > +}; > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush); > + > +/* > * Bitfield values for update_pin_status.flags. > */ > /* Map the grant entry for access by I/O devices. */ If the ARM bits are correct, have a Reviewed-by: David Vrabel <david.vrabel@citrix.com> for the common header. David
On Fri, 3 Oct 2014, David Vrabel wrote: > On 03/10/14 15:53, Stefano Stabellini wrote: > > Introduce support for new hypercall GNTTABOP_cache_flush. > > Use it to perform cache flashing on pages used for dma when necessary. > [..] > > /* functions called by SWIOTLB */ > > @@ -22,16 +25,31 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset, > > size_t len = left; > > void *vaddr; > > > > + if (len + offset > PAGE_SIZE) > > + len = PAGE_SIZE - offset; > > + > > if (!pfn_valid(pfn)) > > { > > - /* TODO: cache flush */ > > + struct gnttab_cache_flush cflush; > > + > > + cflush.op = 0; > > + cflush.a.dev_bus_addr = pfn << PAGE_SHIFT; > > + cflush.offset = offset; > > + cflush.size = len; > > + > > + if (op == dmac_unmap_area && dir != DMA_TO_DEVICE) > > + cflush.op = GNTTAB_CACHE_INVAL; > > + if (op == dmac_map_area) { > > + cflush.op = GNTTAB_CACHE_CLEAN; > > + if (dir == DMA_FROM_DEVICE) > > + cflush.op |= GNTTAB_CACHE_INVAL; > > + } > > Are all these cache operations needed? You do a clean on map regardless > of the direction and INVAL on map seems unnecessary. > > I would have thought it would be: > > map && (TO_DEVICE || BOTH) > op = CLEAN > > unmap && (FROM_DEVICE || BOTH) > op = INVAL I was trying to do the same thing Linux is already doing on native to stay on the safe side. See arch/arm/mm/cache-v7.S:v7_dma_map_area and arch/arm/mm/cache-v7.S:v7_dma_unmap_area. Unless I misread the assembly they should match. > > --- a/include/xen/interface/grant_table.h > > +++ b/include/xen/interface/grant_table.h > > @@ -479,6 +479,24 @@ struct gnttab_get_version { > > DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version); > > > > /* > > + * Issue one or more cache maintenance operations on a portion of a > > + * page granted to the calling domain by a foreign domain. > > + */ > > +#define GNTTABOP_cache_flush 12 > > +struct gnttab_cache_flush { > > + union { > > + uint64_t dev_bus_addr; > > + grant_ref_t ref; > > + } a; > > + uint32_t offset; /* offset from start of grant */ > > + uint32_t size; /* size within the grant */ > > +#define GNTTAB_CACHE_CLEAN (1<<0) > > +#define GNTTAB_CACHE_INVAL (1<<1) > > + uint32_t op; > > +}; > > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush); > > + > > +/* > > * Bitfield values for update_pin_status.flags. > > */ > > /* Map the grant entry for access by I/O devices. */ > > If the ARM bits are correct, have a > > Reviewed-by: David Vrabel <david.vrabel@citrix.com> > > for the common header. > > David >
On Fri, 2014-10-03 at 17:20 +0100, Stefano Stabellini wrote: > On Fri, 3 Oct 2014, David Vrabel wrote: > > On 03/10/14 15:53, Stefano Stabellini wrote: > > > Introduce support for new hypercall GNTTABOP_cache_flush. > > > Use it to perform cache flashing on pages used for dma when necessary. > > [..] > > > /* functions called by SWIOTLB */ > > > @@ -22,16 +25,31 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset, > > > size_t len = left; > > > void *vaddr; > > > > > > + if (len + offset > PAGE_SIZE) > > > + len = PAGE_SIZE - offset; > > > + > > > if (!pfn_valid(pfn)) > > > { > > > - /* TODO: cache flush */ > > > + struct gnttab_cache_flush cflush; > > > + > > > + cflush.op = 0; > > > + cflush.a.dev_bus_addr = pfn << PAGE_SHIFT; > > > + cflush.offset = offset; > > > + cflush.size = len; > > > + > > > + if (op == dmac_unmap_area && dir != DMA_TO_DEVICE) > > > + cflush.op = GNTTAB_CACHE_INVAL; > > > + if (op == dmac_map_area) { > > > + cflush.op = GNTTAB_CACHE_CLEAN; > > > + if (dir == DMA_FROM_DEVICE) > > > + cflush.op |= GNTTAB_CACHE_INVAL; > > > + } > > > > Are all these cache operations needed? You do a clean on map regardless > > of the direction and INVAL on map seems unnecessary. Isn't the inval on map so that the processor doesn't decide to evict/clean the cache line all over your newly DMA'd data? > > I would have thought it would be: > > > > map && (TO_DEVICE || BOTH) > > op = CLEAN > > > > unmap && (FROM_DEVICE || BOTH) > > op = INVAL > > I was trying to do the same thing Linux is already doing on native to > stay on the safe side. > > See arch/arm/mm/cache-v7.S:v7_dma_map_area and > arch/arm/mm/cache-v7.S:v7_dma_unmap_area. > > Unless I misread the assembly they should match. I think you have, beq doesn't set lr, so the called function will return to its "grandparent". i.e. the caller of v7_dma_map_area in this case (which will have used bl), so: ENTRY(v7_dma_map_area) add r1, r1, r0 teq r2, #DMA_FROM_DEVICE beq v7_dma_inv_range b v7_dma_clean_range ENDPROC(v7_dma_map_area) Is actually if (dir == from device) inv else clean which makes much more sense I think. > > > > > --- a/include/xen/interface/grant_table.h > > > +++ b/include/xen/interface/grant_table.h > > > @@ -479,6 +479,24 @@ struct gnttab_get_version { > > > DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version); > > > > > > /* > > > + * Issue one or more cache maintenance operations on a portion of a > > > + * page granted to the calling domain by a foreign domain. > > > + */ > > > +#define GNTTABOP_cache_flush 12 > > > +struct gnttab_cache_flush { > > > + union { > > > + uint64_t dev_bus_addr; > > > + grant_ref_t ref; > > > + } a; > > > + uint32_t offset; /* offset from start of grant */ > > > + uint32_t size; /* size within the grant */ > > > +#define GNTTAB_CACHE_CLEAN (1<<0) > > > +#define GNTTAB_CACHE_INVAL (1<<1) > > > + uint32_t op; > > > +}; > > > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush); > > > + > > > +/* > > > * Bitfield values for update_pin_status.flags. > > > */ > > > /* Map the grant entry for access by I/O devices. */ > > > > If the ARM bits are correct, have a > > > > Reviewed-by: David Vrabel <david.vrabel@citrix.com> > > > > for the common header. > > > > David > >
On 03/10/14 17:34, Ian Campbell wrote: > On Fri, 2014-10-03 at 17:20 +0100, Stefano Stabellini wrote: >> On Fri, 3 Oct 2014, David Vrabel wrote: >>> On 03/10/14 15:53, Stefano Stabellini wrote: >>>> Introduce support for new hypercall GNTTABOP_cache_flush. >>>> Use it to perform cache flashing on pages used for dma when necessary. >>> [..] >>>> /* functions called by SWIOTLB */ >>>> @@ -22,16 +25,31 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset, >>>> size_t len = left; >>>> void *vaddr; >>>> >>>> + if (len + offset > PAGE_SIZE) >>>> + len = PAGE_SIZE - offset; >>>> + >>>> if (!pfn_valid(pfn)) >>>> { >>>> - /* TODO: cache flush */ >>>> + struct gnttab_cache_flush cflush; >>>> + >>>> + cflush.op = 0; >>>> + cflush.a.dev_bus_addr = pfn << PAGE_SHIFT; >>>> + cflush.offset = offset; >>>> + cflush.size = len; >>>> + >>>> + if (op == dmac_unmap_area && dir != DMA_TO_DEVICE) >>>> + cflush.op = GNTTAB_CACHE_INVAL; >>>> + if (op == dmac_map_area) { >>>> + cflush.op = GNTTAB_CACHE_CLEAN; >>>> + if (dir == DMA_FROM_DEVICE) >>>> + cflush.op |= GNTTAB_CACHE_INVAL; >>>> + } >>> >>> Are all these cache operations needed? You do a clean on map regardless >>> of the direction and INVAL on map seems unnecessary. > > Isn't the inval on map so that the processor doesn't decide to > evict/clean the cache line all over your newly DMA'd data? Ah, yes that makes sense. >>> I would have thought it would be: >>> >>> map && (TO_DEVICE || BOTH) >>> op = CLEAN >>> >>> unmap && (FROM_DEVICE || BOTH) >>> op = INVAL >> >> I was trying to do the same thing Linux is already doing on native to >> stay on the safe side. >> >> See arch/arm/mm/cache-v7.S:v7_dma_map_area and >> arch/arm/mm/cache-v7.S:v7_dma_unmap_area. >> >> Unless I misread the assembly they should match. > > I think you have, beq doesn't set lr, so the called function will return > to its "grandparent". i.e. the caller of v7_dma_map_area in this case > (which will have used bl), so: > ENTRY(v7_dma_map_area) > add r1, r1, r0 > teq r2, #DMA_FROM_DEVICE > beq v7_dma_inv_range > b v7_dma_clean_range > ENDPROC(v7_dma_map_area) > > Is actually > if (dir == from device) > inv > else > clean > > which makes much more sense I think. This is how I read the assembler too. David
On Fri, Oct 03, 2014 at 04:05:14PM +0100, David Vrabel wrote: > Are all these cache operations needed? You do a clean on map regardless > of the direction and INVAL on map seems unnecessary. > > I would have thought it would be: > > map && (TO_DEVICE || BOTH) > op = CLEAN > > unmap && (FROM_DEVICE || BOTH) > op = INVAL This is wrong. You've failed to consider the case where you have dirty cache lines in the cache before the DMA operation, and the DMA operation is going to write to memory. So, let's take this case, and work it through. At the map point, we do nothing. This leaves dirty cache lines in the cache. DMA commences, and starts writing data into the memory. Meanwhile, the CPU goes off and does something else. The cache then wants to evict a line, and picks one of the dirty lines associated with this region. The dirty data is written back to memory, overwriting the newly DMA'd data. Meanwhile, the CPU may speculatively load cache lines corresponding with this memory, possibly from the part which has not yet been updated by the DMA activity. At the end of the DMA, we unmap, and at this point we invalidate the cache, getting rid of any remaining dirty lines, and the speculatively loaded lines. We now have corrupted data in the memory region. So, maybe you'll be saying bye bye to your filesystem at this point... You could always clean at map time, but in the case of DMA from the device, this is a waste of bus cycles since you don't need the dirty lines in the cache written back to memory. If you can invalidate, you might as well do that for this case and save writing back data to memory which you're about to overwrite with DMA.
diff --git a/arch/arm/xen/mm32.c b/arch/arm/xen/mm32.c index a5a93fc..6ca09c2 100644 --- a/arch/arm/xen/mm32.c +++ b/arch/arm/xen/mm32.c @@ -4,6 +4,9 @@ #include <linux/highmem.h> #include <xen/features.h> +#include <xen/interface/grant_table.h> + +#include <asm/xen/hypercall.h> /* functions called by SWIOTLB */ @@ -22,16 +25,31 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset, size_t len = left; void *vaddr; + if (len + offset > PAGE_SIZE) + len = PAGE_SIZE - offset; + if (!pfn_valid(pfn)) { - /* TODO: cache flush */ + struct gnttab_cache_flush cflush; + + cflush.op = 0; + cflush.a.dev_bus_addr = pfn << PAGE_SHIFT; + cflush.offset = offset; + cflush.size = len; + + if (op == dmac_unmap_area && dir != DMA_TO_DEVICE) + cflush.op = GNTTAB_CACHE_INVAL; + if (op == dmac_map_area) { + cflush.op = GNTTAB_CACHE_CLEAN; + if (dir == DMA_FROM_DEVICE) + cflush.op |= GNTTAB_CACHE_INVAL; + } + if (cflush.op) + HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1); } else { struct page *page = pfn_to_page(pfn); if (PageHighMem(page)) { - if (len + offset > PAGE_SIZE) - len = PAGE_SIZE - offset; - if (cache_is_vipt_nonaliasing()) { vaddr = kmap_atomic(page); op(vaddr + offset, len, dir); diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h index e40fae9..28ff207 100644 --- a/include/xen/interface/grant_table.h +++ b/include/xen/interface/grant_table.h @@ -479,6 +479,24 @@ struct gnttab_get_version { DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version); /* + * Issue one or more cache maintenance operations on a portion of a + * page granted to the calling domain by a foreign domain. + */ +#define GNTTABOP_cache_flush 12 +struct gnttab_cache_flush { + union { + uint64_t dev_bus_addr; + grant_ref_t ref; + } a; + uint32_t offset; /* offset from start of grant */ + uint32_t size; /* size within the grant */ +#define GNTTAB_CACHE_CLEAN (1<<0) +#define GNTTAB_CACHE_INVAL (1<<1) + uint32_t op; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush); + +/* * Bitfield values for update_pin_status.flags. */ /* Map the grant entry for access by I/O devices. */
Introduce support for new hypercall GNTTABOP_cache_flush. Use it to perform cache flashing on pages used for dma when necessary. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- Changes in v2: - update the hypercall interface to match Xen; - call the interface on a single page at a time. --- arch/arm/xen/mm32.c | 26 ++++++++++++++++++++++---- include/xen/interface/grant_table.h | 18 ++++++++++++++++++ 2 files changed, 40 insertions(+), 4 deletions(-)