Message ID | 20240214162201.4168778-2-aleksander.lobakin@intel.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | dma: skip calling no-op sync ops when possible | expand |
On 2024-02-14 4:21 pm, Alexander Lobakin wrote: > Some platforms do have DMA, but DMA there is always direct and coherent. > Currently, even on such platforms DMA sync operations are compiled and > called. > Add a new hidden Kconfig symbol, DMA_NEED_SYNC, and set it only when > either sync operations are needed or there is DMA ops or swiotlb > enabled. Set dma_need_sync() and dma_skip_sync() depending on this > symbol state and don't call sync ops when dma_skip_sync() is true. > The change allows for future optimizations of DMA sync calls depending > on compile-time or runtime conditions. > > Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> > --- > kernel/dma/Kconfig | 4 ++ > include/linux/dma-mapping.h | 80 +++++++++++++++++++++++++++++++------ > kernel/dma/mapping.c | 20 +++++----- > 3 files changed, 81 insertions(+), 23 deletions(-) > > diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig > index d62f5957f36b..1c9ff05b1ecb 100644 > --- a/kernel/dma/Kconfig > +++ b/kernel/dma/Kconfig > @@ -107,6 +107,10 @@ config DMA_BOUNCE_UNALIGNED_KMALLOC > bool > depends on SWIOTLB > > +config DMA_NEED_SYNC > + def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \ > + ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_OPS || SWIOTLB I'm not sure DMA_OPS belongs here - several architectures have non-trivial ops without syncs, e.g. Alpha. > + > config DMA_RESTRICTED_POOL > bool "DMA Restricted Pool" > depends on OF && OF_RESERVED_MEM && SWIOTLB > diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h > index 4a658de44ee9..6c7640441214 100644 > --- a/include/linux/dma-mapping.h > +++ b/include/linux/dma-mapping.h > @@ -117,13 +117,13 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, > size_t size, enum dma_data_direction dir, unsigned long attrs); > void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, > enum dma_data_direction dir, unsigned long attrs); > -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, > +void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, > enum dma_data_direction dir); > -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, > +void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, > size_t size, enum dma_data_direction dir); > -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, > +void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, > int nelems, enum dma_data_direction dir); > -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, > +void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, > int nelems, enum dma_data_direction dir); > void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, > gfp_t flag, unsigned long attrs); > @@ -147,7 +147,7 @@ u64 dma_get_required_mask(struct device *dev); > bool dma_addressing_limited(struct device *dev); > size_t dma_max_mapping_size(struct device *dev); > size_t dma_opt_mapping_size(struct device *dev); > -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); > +bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr); > unsigned long dma_get_merge_boundary(struct device *dev); > struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, > enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); > @@ -195,19 +195,19 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr, > size_t size, enum dma_data_direction dir, unsigned long attrs) > { > } > -static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, > - size_t size, enum dma_data_direction dir) > +static inline void __dma_sync_single_for_cpu(struct device *dev, > + dma_addr_t addr, size_t size, enum dma_data_direction dir) To me it would feel more logical to put all the wrappers inside the #ifdef CONFIG_HAS_DMA and not touch these stubs at all (what does it mean to skip an inline no-op?). Or in fact, if dma_skip_sync() is constant false for !HAS_DMA, then we could also just make the external function declarations unconditional and remove the stubs. Not a critical matter though, and I defer to whatever Christoph thinks is most maintainable. > { > } > -static inline void dma_sync_single_for_device(struct device *dev, > +static inline void __dma_sync_single_for_device(struct device *dev, > dma_addr_t addr, size_t size, enum dma_data_direction dir) > { > } > -static inline void dma_sync_sg_for_cpu(struct device *dev, > +static inline void __dma_sync_sg_for_cpu(struct device *dev, > struct scatterlist *sg, int nelems, enum dma_data_direction dir) > { > } > -static inline void dma_sync_sg_for_device(struct device *dev, > +static inline void __dma_sync_sg_for_device(struct device *dev, > struct scatterlist *sg, int nelems, enum dma_data_direction dir) > { > } > @@ -277,7 +277,7 @@ static inline size_t dma_opt_mapping_size(struct device *dev) > { > return 0; > } > -static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) > +static inline bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr) > { > return false; > } > @@ -348,18 +348,72 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr, > return dma_unmap_page_attrs(dev, addr, size, dir, attrs); > } > > +static inline void __dma_sync_single_range_for_cpu(struct device *dev, > + dma_addr_t addr, unsigned long offset, size_t size, > + enum dma_data_direction dir) > +{ > + __dma_sync_single_for_cpu(dev, addr + offset, size, dir); > +} > + > +static inline void __dma_sync_single_range_for_device(struct device *dev, > + dma_addr_t addr, unsigned long offset, size_t size, > + enum dma_data_direction dir) > +{ > + __dma_sync_single_for_device(dev, addr + offset, size, dir); > +} There is no need to introduce these two. > + > +static inline bool dma_skip_sync(const struct device *dev) > +{ > + return !IS_ENABLED(CONFIG_DMA_NEED_SYNC); > +} > + > +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) > +{ > + return !dma_skip_sync(dev) ? __dma_need_sync(dev, dma_addr) : false; > +} That's a bit of a mind-bender... is it actually just return !dma_skip_sync(dev) && __dma_need_sync(dev, dma_addr); ? (I do still think the negative flag makes it all a little harder to follow in general than a positive "device needs to consider syncs" flag would.) > +static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, > + size_t size, enum dma_data_direction dir) > +{ > + if (!dma_skip_sync(dev)) > + __dma_sync_single_for_cpu(dev, addr, size, dir); > +} > + > +static inline void dma_sync_single_for_device(struct device *dev, > + dma_addr_t addr, size_t size, enum dma_data_direction dir) > +{ > + if (!dma_skip_sync(dev)) > + __dma_sync_single_for_device(dev, addr, size, dir); > +} > + > +static inline void dma_sync_sg_for_cpu(struct device *dev, > + struct scatterlist *sg, int nelems, enum dma_data_direction dir) > +{ > + if (!dma_skip_sync(dev)) > + __dma_sync_sg_for_cpu(dev, sg, nelems, dir); > +} > + > +static inline void dma_sync_sg_for_device(struct device *dev, > + struct scatterlist *sg, int nelems, enum dma_data_direction dir) > +{ > + if (!dma_skip_sync(dev)) > + __dma_sync_sg_for_device(dev, sg, nelems, dir); > +} > + > static inline void dma_sync_single_range_for_cpu(struct device *dev, > dma_addr_t addr, unsigned long offset, size_t size, > enum dma_data_direction dir) > { > - return dma_sync_single_for_cpu(dev, addr + offset, size, dir); > + if (!dma_skip_sync(dev)) > + __dma_sync_single_for_cpu(dev, addr + offset, size, dir); > } > > static inline void dma_sync_single_range_for_device(struct device *dev, > dma_addr_t addr, unsigned long offset, size_t size, > enum dma_data_direction dir) > { > - return dma_sync_single_for_device(dev, addr + offset, size, dir); > + if (!dma_skip_sync(dev)) > + __dma_sync_single_for_device(dev, addr + offset, size, dir); > } These two don't need changing either, since the dma_sync_single_* wrappers have already taken care of it. Thanks, Robin.
On 2024-02-14 4:21 pm, Alexander Lobakin wrote: [...] > +static inline bool dma_skip_sync(const struct device *dev) > +{ > + return !IS_ENABLED(CONFIG_DMA_NEED_SYNC); > +} One more thing, could we please also make this conditional on !CONFIG_DMA_API_DEBUG so that that doesn't lose coverage for validating syncs? Thanks, Robin.
On Wed, Feb 14, 2024 at 05:20:50PM +0000, Robin Murphy wrote: >> +config DMA_NEED_SYNC >> + def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \ >> + ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_OPS || SWIOTLB > > I'm not sure DMA_OPS belongs here - several architectures have non-trivial > ops without syncs, e.g. Alpha. True, but peeking through the ops is a bit hard. And I don't think it's worth optimizing the dma sync performance on Alpha :) >> +static inline void __dma_sync_single_for_cpu(struct device *dev, >> + dma_addr_t addr, size_t size, enum dma_data_direction dir) > > To me it would feel more logical to put all the wrappers inside the #ifdef > CONFIG_HAS_DMA and not touch these stubs at all (what does it mean to skip > an inline no-op?). Or in fact, if dma_skip_sync() is constant false for > !HAS_DMA, then we could also just make the external function declarations > unconditional and remove the stubs. Not a critical matter though, and I > defer to whatever Christoph thinks is most maintainable. Your idea sounds reasonable to me, but I don't have a strong preference. >> +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) >> +{ >> + return !dma_skip_sync(dev) ? __dma_need_sync(dev, dma_addr) : false; >> +} > > That's a bit of a mind-bender... is it actually just > > return !dma_skip_sync(dev) && __dma_need_sync(dev, dma_addr); > > ? That looks a lot more readable for sure. > (I do still think the negative flag makes it all a little harder to follow > in general than a positive "device needs to consider syncs" flag would.) Probably.
On Wed, Feb 14, 2024 at 06:09:08PM +0000, Robin Murphy wrote: > On 2024-02-14 4:21 pm, Alexander Lobakin wrote: > [...] >> +static inline bool dma_skip_sync(const struct device *dev) >> +{ >> + return !IS_ENABLED(CONFIG_DMA_NEED_SYNC); >> +} > > One more thing, could we please also make this conditional on > !CONFIG_DMA_API_DEBUG so that that doesn't lose coverage for validating > syncs? Agreed.
From: Robin Murphy <robin.murphy@arm.com> Date: Wed, 14 Feb 2024 17:20:50 +0000 > On 2024-02-14 4:21 pm, Alexander Lobakin wrote: [...] >> -static inline void dma_sync_single_for_cpu(struct device *dev, >> dma_addr_t addr, >> - size_t size, enum dma_data_direction dir) >> +static inline void __dma_sync_single_for_cpu(struct device *dev, >> + dma_addr_t addr, size_t size, enum dma_data_direction dir) > > To me it would feel more logical to put all the wrappers inside the > #ifdef CONFIG_HAS_DMA and not touch these stubs at all (what does it > mean to skip an inline no-op?). Or in fact, if dma_skip_sync() is > constant false for !HAS_DMA, then we could also just make the external > function declarations unconditional and remove the stubs. Not a critical > matter though, and I defer to whatever Christoph thinks is most > maintainable. It's done like that due to that I'm adding a runtime check in the second patch. I don't feel like touching this twice makes sense. [...] >> @@ -348,18 +348,72 @@ static inline void dma_unmap_single_attrs(struct >> device *dev, dma_addr_t addr, >> return dma_unmap_page_attrs(dev, addr, size, dir, attrs); >> } >> +static inline void __dma_sync_single_range_for_cpu(struct device *dev, >> + dma_addr_t addr, unsigned long offset, size_t size, >> + enum dma_data_direction dir) >> +{ >> + __dma_sync_single_for_cpu(dev, addr + offset, size, dir); >> +} >> + >> +static inline void __dma_sync_single_range_for_device(struct device >> *dev, >> + dma_addr_t addr, unsigned long offset, size_t size, >> + enum dma_data_direction dir) >> +{ >> + __dma_sync_single_for_device(dev, addr + offset, size, dir); >> +} > > There is no need to introduce these two. I already replied to this in the previous thread. Some subsys may want to check for the shortcut earlier to avoid call ladders of their own functions. See patch 6 for example where I use this one. > >> + >> +static inline bool dma_skip_sync(const struct device *dev) >> +{ >> + return !IS_ENABLED(CONFIG_DMA_NEED_SYNC); >> +} >> + >> +static inline bool dma_need_sync(struct device *dev, dma_addr_t >> dma_addr) >> +{ >> + return !dma_skip_sync(dev) ? __dma_need_sync(dev, dma_addr) : false; >> +} > > That's a bit of a mind-bender... is it actually just > > return !dma_skip_sync(dev) && __dma_need_sync(dev, dma_addr); Oh, indeed ._. > > ? > > (I do still think the negative flag makes it all a little harder to > follow in general than a positive "device needs to consider syncs" flag > would.) I think it was in the original Eric's idea and I kept this. I'm fine with inverting it. [...] > Thanks, > Robin. Thanks, Olek
On 19/02/2024 12:53 pm, Alexander Lobakin wrote: > From: Robin Murphy <robin.murphy@arm.com> > Date: Wed, 14 Feb 2024 17:20:50 +0000 > >> On 2024-02-14 4:21 pm, Alexander Lobakin wrote: > > [...] > >>> -static inline void dma_sync_single_for_cpu(struct device *dev, >>> dma_addr_t addr, >>> - size_t size, enum dma_data_direction dir) >>> +static inline void __dma_sync_single_for_cpu(struct device *dev, >>> + dma_addr_t addr, size_t size, enum dma_data_direction dir) >> >> To me it would feel more logical to put all the wrappers inside the >> #ifdef CONFIG_HAS_DMA and not touch these stubs at all (what does it >> mean to skip an inline no-op?). Or in fact, if dma_skip_sync() is >> constant false for !HAS_DMA, then we could also just make the external >> function declarations unconditional and remove the stubs. Not a critical >> matter though, and I defer to whatever Christoph thinks is most >> maintainable. > > It's done like that due to that I'm adding a runtime check in the second > patch. I don't feel like touching this twice makes sense. Huh? Why would anything need touching twice? All I'm saying is that it's pretty pointless to add any invocations of dma_skip_sync() in !HAS_DMA paths where we already know the whole API is stubbed out anyway. The only cases which are worth differentiating here are HAS_DMA + DMA_NEED_SYNC vs. HAS_DMA + !DMA_NEED_SYNC (with the subsequent runtime check then just subdividing the former). > > [...] > >>> @@ -348,18 +348,72 @@ static inline void dma_unmap_single_attrs(struct >>> device *dev, dma_addr_t addr, >>> return dma_unmap_page_attrs(dev, addr, size, dir, attrs); >>> } >>> +static inline void __dma_sync_single_range_for_cpu(struct device *dev, >>> + dma_addr_t addr, unsigned long offset, size_t size, >>> + enum dma_data_direction dir) >>> +{ >>> + __dma_sync_single_for_cpu(dev, addr + offset, size, dir); >>> +} >>> + >>> +static inline void __dma_sync_single_range_for_device(struct device >>> *dev, >>> + dma_addr_t addr, unsigned long offset, size_t size, >>> + enum dma_data_direction dir) >>> +{ >>> + __dma_sync_single_for_device(dev, addr + offset, size, dir); >>> +} >> >> There is no need to introduce these two. > > I already replied to this in the previous thread. Some subsys may want > to check for the shortcut earlier to avoid call ladders of their own > functions. See patch 6 for example where I use this one. Ugh, no. If the page pool code wants to be clever poking around and sidestepping parts of the documented API, it can flippin' well open-code a single addition to call __dma_sync_single_for_device() directly itself. I'm not at all keen on having to maintain "common" APIs for such niche trickery. Thanks, Robin.
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index d62f5957f36b..1c9ff05b1ecb 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -107,6 +107,10 @@ config DMA_BOUNCE_UNALIGNED_KMALLOC bool depends on SWIOTLB +config DMA_NEED_SYNC + def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \ + ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_OPS || SWIOTLB + config DMA_RESTRICTED_POOL bool "DMA Restricted Pool" depends on OF && OF_RESERVED_MEM && SWIOTLB diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 4a658de44ee9..6c7640441214 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -117,13 +117,13 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, enum dma_data_direction dir, unsigned long attrs); void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, +void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir); -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, +void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir); -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, +void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir); -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, +void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir); void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs); @@ -147,7 +147,7 @@ u64 dma_get_required_mask(struct device *dev); bool dma_addressing_limited(struct device *dev); size_t dma_max_mapping_size(struct device *dev); size_t dma_opt_mapping_size(struct device *dev); -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); +bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr); unsigned long dma_get_merge_boundary(struct device *dev); struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); @@ -195,19 +195,19 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { } -static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) +static inline void __dma_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) { } -static inline void dma_sync_single_for_device(struct device *dev, +static inline void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { } -static inline void dma_sync_sg_for_cpu(struct device *dev, +static inline void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { } -static inline void dma_sync_sg_for_device(struct device *dev, +static inline void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { } @@ -277,7 +277,7 @@ static inline size_t dma_opt_mapping_size(struct device *dev) { return 0; } -static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +static inline bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr) { return false; } @@ -348,18 +348,72 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr, return dma_unmap_page_attrs(dev, addr, size, dir, attrs); } +static inline void __dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t addr, unsigned long offset, size_t size, + enum dma_data_direction dir) +{ + __dma_sync_single_for_cpu(dev, addr + offset, size, dir); +} + +static inline void __dma_sync_single_range_for_device(struct device *dev, + dma_addr_t addr, unsigned long offset, size_t size, + enum dma_data_direction dir) +{ + __dma_sync_single_for_device(dev, addr + offset, size, dir); +} + +static inline bool dma_skip_sync(const struct device *dev) +{ + return !IS_ENABLED(CONFIG_DMA_NEED_SYNC); +} + +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return !dma_skip_sync(dev) ? __dma_need_sync(dev, dma_addr) : false; +} + +static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + if (!dma_skip_sync(dev)) + __dma_sync_single_for_cpu(dev, addr, size, dir); +} + +static inline void dma_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + if (!dma_skip_sync(dev)) + __dma_sync_single_for_device(dev, addr, size, dir); +} + +static inline void dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ + if (!dma_skip_sync(dev)) + __dma_sync_sg_for_cpu(dev, sg, nelems, dir); +} + +static inline void dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ + if (!dma_skip_sync(dev)) + __dma_sync_sg_for_device(dev, sg, nelems, dir); +} + static inline void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir) { - return dma_sync_single_for_cpu(dev, addr + offset, size, dir); + if (!dma_skip_sync(dev)) + __dma_sync_single_for_cpu(dev, addr + offset, size, dir); } static inline void dma_sync_single_range_for_device(struct device *dev, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir) { - return dma_sync_single_for_device(dev, addr + offset, size, dir); + if (!dma_skip_sync(dev)) + __dma_sync_single_for_device(dev, addr + offset, size, dir); } /** diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 58db8fd70471..85feaa0e008c 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -329,7 +329,7 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, } EXPORT_SYMBOL(dma_unmap_resource); -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, +void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -341,9 +341,9 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, ops->sync_single_for_cpu(dev, addr, size, dir); debug_dma_sync_single_for_cpu(dev, addr, size, dir); } -EXPORT_SYMBOL(dma_sync_single_for_cpu); +EXPORT_SYMBOL(__dma_sync_single_for_cpu); -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, +void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -355,9 +355,9 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, ops->sync_single_for_device(dev, addr, size, dir); debug_dma_sync_single_for_device(dev, addr, size, dir); } -EXPORT_SYMBOL(dma_sync_single_for_device); +EXPORT_SYMBOL(__dma_sync_single_for_device); -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, +void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -369,9 +369,9 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, ops->sync_sg_for_cpu(dev, sg, nelems, dir); debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); } -EXPORT_SYMBOL(dma_sync_sg_for_cpu); +EXPORT_SYMBOL(__dma_sync_sg_for_cpu); -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, +void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -383,7 +383,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, ops->sync_sg_for_device(dev, sg, nelems, dir); debug_dma_sync_sg_for_device(dev, sg, nelems, dir); } -EXPORT_SYMBOL(dma_sync_sg_for_device); +EXPORT_SYMBOL(__dma_sync_sg_for_device); /* * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems @@ -841,7 +841,7 @@ size_t dma_opt_mapping_size(struct device *dev) } EXPORT_SYMBOL_GPL(dma_opt_mapping_size); -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -849,7 +849,7 @@ bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) return dma_direct_need_sync(dev, dma_addr); return ops->sync_single_for_cpu || ops->sync_single_for_device; } -EXPORT_SYMBOL_GPL(dma_need_sync); +EXPORT_SYMBOL_GPL(__dma_need_sync); unsigned long dma_get_merge_boundary(struct device *dev) {
Some platforms do have DMA, but DMA there is always direct and coherent. Currently, even on such platforms DMA sync operations are compiled and called. Add a new hidden Kconfig symbol, DMA_NEED_SYNC, and set it only when either sync operations are needed or there is DMA ops or swiotlb enabled. Set dma_need_sync() and dma_skip_sync() depending on this symbol state and don't call sync ops when dma_skip_sync() is true. The change allows for future optimizations of DMA sync calls depending on compile-time or runtime conditions. Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> --- kernel/dma/Kconfig | 4 ++ include/linux/dma-mapping.h | 80 +++++++++++++++++++++++++++++++------ kernel/dma/mapping.c | 20 +++++----- 3 files changed, 81 insertions(+), 23 deletions(-)