diff mbox series

[v3,08/12] drm/ttm: Use drm_memcpy_from_wc_dbm for TTM bo moves

Message ID 20210521153253.518037-9-thomas.hellstrom@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Move LMEM (VRAM) management over to TTM | expand

Commit Message

Thomas Hellström May 21, 2021, 3:32 p.m. UTC
Use fast wc memcpy for reading out of wc memory for TTM bo moves.

Cc: Dave Airlie <airlied@gmail.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

Comments

Matthew Auld May 24, 2021, 6:16 p.m. UTC | #1
On Fri, 21 May 2021 at 16:33, Thomas Hellström
<thomas.hellstrom@linux.intel.com> wrote:
>
> Use fast wc memcpy for reading out of wc memory for TTM bo moves.
>
> Cc: Dave Airlie <airlied@gmail.com>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
>  drivers/gpu/drm/ttm/ttm_bo_util.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 912cbe8e60a2..4a7d3d672f9a 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -31,6 +31,7 @@
>
>  #include <drm/ttm/ttm_bo_driver.h>
>  #include <drm/ttm/ttm_placement.h>
> +#include <drm/drm_memcpy.h>
>  #include <drm/drm_vma_manager.h>
>  #include <linux/dma-buf-map.h>
>  #include <linux/io.h>
> @@ -91,6 +92,7 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo,
>         const struct ttm_kmap_iter_ops *src_ops = src_iter->ops;
>         struct ttm_tt *ttm = bo->ttm;
>         struct dma_buf_map src_map, dst_map;
> +       bool wc_memcpy;
>         pgoff_t i;
>
>         /* Single TTM move. NOP */
> @@ -114,11 +116,16 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo,
>                 return;
>         }
>
> +       wc_memcpy = ((!src_ops->maps_tt || ttm->caching != ttm_cached) &&

Why do we only consider the caching value for the maps_tt case? Or am
I misreading this?

> +                    drm_has_memcpy_from_wc());
> +
>         for (i = 0; i < dst_mem->num_pages; ++i) {
>                 dst_ops->map_local(dst_iter, &dst_map, i);
>                 src_ops->map_local(src_iter, &src_map, i);
>
> -               if (!src_map.is_iomem && !dst_map.is_iomem) {
> +               if (wc_memcpy) {
> +                       drm_memcpy_from_wc_dbm(&dst_map, &src_map, PAGE_SIZE);

Do we need to check the return value here? memcpy_from_wc expects
certain address alignment, or is that always guaranteed here? Maybe
throw a warning just for paranoia?

> +               } else if (!src_map.is_iomem && !dst_map.is_iomem) {
>                         memcpy(dst_map.vaddr, src_map.vaddr, PAGE_SIZE);
>                 } else if (!src_map.is_iomem) {
>                         dma_buf_map_memcpy_to(&dst_map, src_map.vaddr,
> --
> 2.31.1
>
Thomas Hellström May 24, 2021, 6:47 p.m. UTC | #2
On Mon, 2021-05-24 at 19:16 +0100, Matthew Auld wrote:
> On Fri, 21 May 2021 at 16:33, Thomas Hellström
> <thomas.hellstrom@linux.intel.com> wrote:
> > 
> > Use fast wc memcpy for reading out of wc memory for TTM bo moves.
> > 
> > Cc: Dave Airlie <airlied@gmail.com>
> > Cc: Christian König <christian.koenig@amd.com>
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > ---
> >  drivers/gpu/drm/ttm/ttm_bo_util.c | 9 ++++++++-
> >  1 file changed, 8 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c
> > b/drivers/gpu/drm/ttm/ttm_bo_util.c
> > index 912cbe8e60a2..4a7d3d672f9a 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> > @@ -31,6 +31,7 @@
> > 
> >  #include <drm/ttm/ttm_bo_driver.h>
> >  #include <drm/ttm/ttm_placement.h>
> > +#include <drm/drm_memcpy.h>
> >  #include <drm/drm_vma_manager.h>
> >  #include <linux/dma-buf-map.h>
> >  #include <linux/io.h>
> > @@ -91,6 +92,7 @@ void ttm_move_memcpy(struct ttm_buffer_object
> > *bo,
> >         const struct ttm_kmap_iter_ops *src_ops = src_iter->ops;
> >         struct ttm_tt *ttm = bo->ttm;
> >         struct dma_buf_map src_map, dst_map;
> > +       bool wc_memcpy;
> >         pgoff_t i;
> > 
> >         /* Single TTM move. NOP */
> > @@ -114,11 +116,16 @@ void ttm_move_memcpy(struct ttm_buffer_object
> > *bo,
> >                 return;
> >         }
> > 
> > +       wc_memcpy = ((!src_ops->maps_tt || ttm->caching !=
> > ttm_cached) &&
> 
> Why do we only consider the caching value for the maps_tt case? Or am
> I misreading this?

Hmm, you're right we should probably check also the iomem caching or
ignore the tt caching. Sometimes these special memcpys tend to be less
cpu cache thrashing and should be used wherever possible, but I guess
we should start out with only using it when source is WC.

> 
> > +                    drm_has_memcpy_from_wc());
> > +
> >         for (i = 0; i < dst_mem->num_pages; ++i) {
> >                 dst_ops->map_local(dst_iter, &dst_map, i);
> >                 src_ops->map_local(src_iter, &src_map, i);
> > 
> > -               if (!src_map.is_iomem && !dst_map.is_iomem) {
> > +               if (wc_memcpy) {
> > +                       drm_memcpy_from_wc_dbm(&dst_map, &src_map,
> > PAGE_SIZE);
> 
> Do we need to check the return value here? memcpy_from_wc expects
> certain address alignment, or is that always guaranteed here? Maybe
> throw a warning just for paranoia?

It should always be PAGE_SIZE aligned. But I guess it doesn't hurt to
do 
if (wc_memcpy && drm_memcpy_from_wc_dbm())
;

> 
> > +               } else if (!src_map.is_iomem && !dst_map.is_iomem)
> > {
> >                         memcpy(dst_map.vaddr, src_map.vaddr,
> > PAGE_SIZE);
> >                 } else if (!src_map.is_iomem) {
> >                         dma_buf_map_memcpy_to(&dst_map,
> > src_map.vaddr,
> > --
> > 2.31.1
> >
Christian König May 26, 2021, 12:48 p.m. UTC | #3
Am 21.05.21 um 17:32 schrieb Thomas Hellström:
> Use fast wc memcpy for reading out of wc memory for TTM bo moves.
>
> Cc: Dave Airlie <airlied@gmail.com>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/ttm/ttm_bo_util.c | 9 ++++++++-
>   1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 912cbe8e60a2..4a7d3d672f9a 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -31,6 +31,7 @@
>   
>   #include <drm/ttm/ttm_bo_driver.h>
>   #include <drm/ttm/ttm_placement.h>
> +#include <drm/drm_memcpy.h>
>   #include <drm/drm_vma_manager.h>
>   #include <linux/dma-buf-map.h>
>   #include <linux/io.h>
> @@ -91,6 +92,7 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo,
>   	const struct ttm_kmap_iter_ops *src_ops = src_iter->ops;
>   	struct ttm_tt *ttm = bo->ttm;
>   	struct dma_buf_map src_map, dst_map;
> +	bool wc_memcpy;
>   	pgoff_t i;
>   
>   	/* Single TTM move. NOP */
> @@ -114,11 +116,16 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo,
>   		return;
>   	}
>   
> +	wc_memcpy = ((!src_ops->maps_tt || ttm->caching != ttm_cached) &&
> +		     drm_has_memcpy_from_wc());
> +
>   	for (i = 0; i < dst_mem->num_pages; ++i) {
>   		dst_ops->map_local(dst_iter, &dst_map, i);
>   		src_ops->map_local(src_iter, &src_map, i);
>   
> -		if (!src_map.is_iomem && !dst_map.is_iomem) {
> +		if (wc_memcpy) {
> +			drm_memcpy_from_wc_dbm(&dst_map, &src_map, PAGE_SIZE);
> +		} else if (!src_map.is_iomem && !dst_map.is_iomem) {
>   			memcpy(dst_map.vaddr, src_map.vaddr, PAGE_SIZE);
>   		} else if (!src_map.is_iomem) {
>   			dma_buf_map_memcpy_to(&dst_map, src_map.vaddr,
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 912cbe8e60a2..4a7d3d672f9a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -31,6 +31,7 @@ 
 
 #include <drm/ttm/ttm_bo_driver.h>
 #include <drm/ttm/ttm_placement.h>
+#include <drm/drm_memcpy.h>
 #include <drm/drm_vma_manager.h>
 #include <linux/dma-buf-map.h>
 #include <linux/io.h>
@@ -91,6 +92,7 @@  void ttm_move_memcpy(struct ttm_buffer_object *bo,
 	const struct ttm_kmap_iter_ops *src_ops = src_iter->ops;
 	struct ttm_tt *ttm = bo->ttm;
 	struct dma_buf_map src_map, dst_map;
+	bool wc_memcpy;
 	pgoff_t i;
 
 	/* Single TTM move. NOP */
@@ -114,11 +116,16 @@  void ttm_move_memcpy(struct ttm_buffer_object *bo,
 		return;
 	}
 
+	wc_memcpy = ((!src_ops->maps_tt || ttm->caching != ttm_cached) &&
+		     drm_has_memcpy_from_wc());
+
 	for (i = 0; i < dst_mem->num_pages; ++i) {
 		dst_ops->map_local(dst_iter, &dst_map, i);
 		src_ops->map_local(src_iter, &src_map, i);
 
-		if (!src_map.is_iomem && !dst_map.is_iomem) {
+		if (wc_memcpy) {
+			drm_memcpy_from_wc_dbm(&dst_map, &src_map, PAGE_SIZE);
+		} else if (!src_map.is_iomem && !dst_map.is_iomem) {
 			memcpy(dst_map.vaddr, src_map.vaddr, PAGE_SIZE);
 		} else if (!src_map.is_iomem) {
 			dma_buf_map_memcpy_to(&dst_map, src_map.vaddr,