Message ID | 20210521153253.518037-9-thomas.hellstrom@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: Move LMEM (VRAM) management over to TTM | expand |
On Fri, 21 May 2021 at 16:33, Thomas Hellström <thomas.hellstrom@linux.intel.com> wrote: > > Use fast wc memcpy for reading out of wc memory for TTM bo moves. > > Cc: Dave Airlie <airlied@gmail.com> > Cc: Christian König <christian.koenig@amd.com> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> > --- > drivers/gpu/drm/ttm/ttm_bo_util.c | 9 ++++++++- > 1 file changed, 8 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c > index 912cbe8e60a2..4a7d3d672f9a 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c > @@ -31,6 +31,7 @@ > > #include <drm/ttm/ttm_bo_driver.h> > #include <drm/ttm/ttm_placement.h> > +#include <drm/drm_memcpy.h> > #include <drm/drm_vma_manager.h> > #include <linux/dma-buf-map.h> > #include <linux/io.h> > @@ -91,6 +92,7 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo, > const struct ttm_kmap_iter_ops *src_ops = src_iter->ops; > struct ttm_tt *ttm = bo->ttm; > struct dma_buf_map src_map, dst_map; > + bool wc_memcpy; > pgoff_t i; > > /* Single TTM move. NOP */ > @@ -114,11 +116,16 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo, > return; > } > > + wc_memcpy = ((!src_ops->maps_tt || ttm->caching != ttm_cached) && Why do we only consider the caching value for the maps_tt case? Or am I misreading this? > + drm_has_memcpy_from_wc()); > + > for (i = 0; i < dst_mem->num_pages; ++i) { > dst_ops->map_local(dst_iter, &dst_map, i); > src_ops->map_local(src_iter, &src_map, i); > > - if (!src_map.is_iomem && !dst_map.is_iomem) { > + if (wc_memcpy) { > + drm_memcpy_from_wc_dbm(&dst_map, &src_map, PAGE_SIZE); Do we need to check the return value here? memcpy_from_wc expects certain address alignment, or is that always guaranteed here? Maybe throw a warning just for paranoia? > + } else if (!src_map.is_iomem && !dst_map.is_iomem) { > memcpy(dst_map.vaddr, src_map.vaddr, PAGE_SIZE); > } else if (!src_map.is_iomem) { > dma_buf_map_memcpy_to(&dst_map, src_map.vaddr, > -- > 2.31.1 >
On Mon, 2021-05-24 at 19:16 +0100, Matthew Auld wrote: > On Fri, 21 May 2021 at 16:33, Thomas Hellström > <thomas.hellstrom@linux.intel.com> wrote: > > > > Use fast wc memcpy for reading out of wc memory for TTM bo moves. > > > > Cc: Dave Airlie <airlied@gmail.com> > > Cc: Christian König <christian.koenig@amd.com> > > Cc: Daniel Vetter <daniel.vetter@ffwll.ch> > > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> > > --- > > drivers/gpu/drm/ttm/ttm_bo_util.c | 9 ++++++++- > > 1 file changed, 8 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c > > b/drivers/gpu/drm/ttm/ttm_bo_util.c > > index 912cbe8e60a2..4a7d3d672f9a 100644 > > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c > > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c > > @@ -31,6 +31,7 @@ > > > > #include <drm/ttm/ttm_bo_driver.h> > > #include <drm/ttm/ttm_placement.h> > > +#include <drm/drm_memcpy.h> > > #include <drm/drm_vma_manager.h> > > #include <linux/dma-buf-map.h> > > #include <linux/io.h> > > @@ -91,6 +92,7 @@ void ttm_move_memcpy(struct ttm_buffer_object > > *bo, > > const struct ttm_kmap_iter_ops *src_ops = src_iter->ops; > > struct ttm_tt *ttm = bo->ttm; > > struct dma_buf_map src_map, dst_map; > > + bool wc_memcpy; > > pgoff_t i; > > > > /* Single TTM move. NOP */ > > @@ -114,11 +116,16 @@ void ttm_move_memcpy(struct ttm_buffer_object > > *bo, > > return; > > } > > > > + wc_memcpy = ((!src_ops->maps_tt || ttm->caching != > > ttm_cached) && > > Why do we only consider the caching value for the maps_tt case? Or am > I misreading this? Hmm, you're right we should probably check also the iomem caching or ignore the tt caching. Sometimes these special memcpys tend to be less cpu cache thrashing and should be used wherever possible, but I guess we should start out with only using it when source is WC. > > > + drm_has_memcpy_from_wc()); > > + > > for (i = 0; i < dst_mem->num_pages; ++i) { > > dst_ops->map_local(dst_iter, &dst_map, i); > > src_ops->map_local(src_iter, &src_map, i); > > > > - if (!src_map.is_iomem && !dst_map.is_iomem) { > > + if (wc_memcpy) { > > + drm_memcpy_from_wc_dbm(&dst_map, &src_map, > > PAGE_SIZE); > > Do we need to check the return value here? memcpy_from_wc expects > certain address alignment, or is that always guaranteed here? Maybe > throw a warning just for paranoia? It should always be PAGE_SIZE aligned. But I guess it doesn't hurt to do if (wc_memcpy && drm_memcpy_from_wc_dbm()) ; > > > + } else if (!src_map.is_iomem && !dst_map.is_iomem) > > { > > memcpy(dst_map.vaddr, src_map.vaddr, > > PAGE_SIZE); > > } else if (!src_map.is_iomem) { > > dma_buf_map_memcpy_to(&dst_map, > > src_map.vaddr, > > -- > > 2.31.1 > >
Am 21.05.21 um 17:32 schrieb Thomas Hellström: > Use fast wc memcpy for reading out of wc memory for TTM bo moves. > > Cc: Dave Airlie <airlied@gmail.com> > Cc: Christian König <christian.koenig@amd.com> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reviewed-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/ttm/ttm_bo_util.c | 9 ++++++++- > 1 file changed, 8 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c > index 912cbe8e60a2..4a7d3d672f9a 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c > @@ -31,6 +31,7 @@ > > #include <drm/ttm/ttm_bo_driver.h> > #include <drm/ttm/ttm_placement.h> > +#include <drm/drm_memcpy.h> > #include <drm/drm_vma_manager.h> > #include <linux/dma-buf-map.h> > #include <linux/io.h> > @@ -91,6 +92,7 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo, > const struct ttm_kmap_iter_ops *src_ops = src_iter->ops; > struct ttm_tt *ttm = bo->ttm; > struct dma_buf_map src_map, dst_map; > + bool wc_memcpy; > pgoff_t i; > > /* Single TTM move. NOP */ > @@ -114,11 +116,16 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo, > return; > } > > + wc_memcpy = ((!src_ops->maps_tt || ttm->caching != ttm_cached) && > + drm_has_memcpy_from_wc()); > + > for (i = 0; i < dst_mem->num_pages; ++i) { > dst_ops->map_local(dst_iter, &dst_map, i); > src_ops->map_local(src_iter, &src_map, i); > > - if (!src_map.is_iomem && !dst_map.is_iomem) { > + if (wc_memcpy) { > + drm_memcpy_from_wc_dbm(&dst_map, &src_map, PAGE_SIZE); > + } else if (!src_map.is_iomem && !dst_map.is_iomem) { > memcpy(dst_map.vaddr, src_map.vaddr, PAGE_SIZE); > } else if (!src_map.is_iomem) { > dma_buf_map_memcpy_to(&dst_map, src_map.vaddr,
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 912cbe8e60a2..4a7d3d672f9a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -31,6 +31,7 @@ #include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_placement.h> +#include <drm/drm_memcpy.h> #include <drm/drm_vma_manager.h> #include <linux/dma-buf-map.h> #include <linux/io.h> @@ -91,6 +92,7 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo, const struct ttm_kmap_iter_ops *src_ops = src_iter->ops; struct ttm_tt *ttm = bo->ttm; struct dma_buf_map src_map, dst_map; + bool wc_memcpy; pgoff_t i; /* Single TTM move. NOP */ @@ -114,11 +116,16 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo, return; } + wc_memcpy = ((!src_ops->maps_tt || ttm->caching != ttm_cached) && + drm_has_memcpy_from_wc()); + for (i = 0; i < dst_mem->num_pages; ++i) { dst_ops->map_local(dst_iter, &dst_map, i); src_ops->map_local(src_iter, &src_map, i); - if (!src_map.is_iomem && !dst_map.is_iomem) { + if (wc_memcpy) { + drm_memcpy_from_wc_dbm(&dst_map, &src_map, PAGE_SIZE); + } else if (!src_map.is_iomem && !dst_map.is_iomem) { memcpy(dst_map.vaddr, src_map.vaddr, PAGE_SIZE); } else if (!src_map.is_iomem) { dma_buf_map_memcpy_to(&dst_map, src_map.vaddr,
Use fast wc memcpy for reading out of wc memory for TTM bo moves. Cc: Dave Airlie <airlied@gmail.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> --- drivers/gpu/drm/ttm/ttm_bo_util.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)