[4/6] drm/nouveau: introduce NOUVEAU_GEM_TILE_WCUS

Message ID	1377648050-6649-5-git-send-email-dev@lynxeye.de (mailing list archive)
State	New, archived
Headers	show Return-Path: <dri-devel-bounces+patchwork-dri-devel=patchwork.kernel.org@lists.freedesktop.org> From: Lucas Stach <dev@lynxeye.de> To: nouveau@lists.freedesktop.org Subject: [PATCH 4/6] drm/nouveau: introduce NOUVEAU_GEM_TILE_WCUS Date: Wed, 28 Aug 2013 02:00:48 +0200 Message-Id: <1377648050-6649-5-git-send-email-dev@lynxeye.de> In-Reply-To: <1377648050-6649-1-git-send-email-dev@lynxeye.de> References: <1377648050-6649-1-git-send-email-dev@lynxeye.de> Cc: dri-devel@lists.freedesktop.org Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Sender: dri-devel-bounces+patchwork-dri-devel=patchwork.kernel.org@lists.freedesktop.org Errors-To: dri-devel-bounces+patchwork-dri-devel=patchwork.kernel.org@lists.freedesktop.org

Message ID

1377648050-6649-5-git-send-email-dev@lynxeye.de (mailing list archive)

State

New, archived

Headers

From: Lucas Stach <dev@lynxeye.de>
To: nouveau@lists.freedesktop.org
Subject: [PATCH 4/6] drm/nouveau: introduce NOUVEAU_GEM_TILE_WCUS
Date: Wed, 28 Aug 2013 02:00:48 +0200
Message-Id: <1377648050-6649-5-git-send-email-dev@lynxeye.de>
In-Reply-To: <1377648050-6649-1-git-send-email-dev@lynxeye.de>
References: <1377648050-6649-1-git-send-email-dev@lynxeye.de>
Cc: dri-devel@lists.freedesktop.org
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Sender: dri-devel-bounces+patchwork-dri-devel=patchwork.kernel.org@lists.freedesktop.org
Errors-To: dri-devel-bounces+patchwork-dri-devel=patchwork.kernel.org@lists.freedesktop.org

Commit Message

Lucas Stach Aug. 28, 2013, midnight UTC

This flag allows userspace to give the kernel a hint that it should use
a non-snooped resource. To guarantee coherency at all times mappings
into userspace are done write combined, so userspace should avoid
reading back from those resources.

Signed-off-by: Lucas Stach <dev@lynxeye.de>
---
On x86 an optimized userspace can save up on snoop traffic in the
system, on ARM the benefits are potentially much larger, as we can save
the manual cache flush/invalidate.
---
 drivers/gpu/drm/nouveau/nouveau_bo.c | 11 ++++++++++-
 drivers/gpu/drm/nouveau/nouveau_bo.h |  1 +
 include/uapi/drm/nouveau_drm.h       |  1 +
 3 files changed, 12 insertions(+), 1 deletion(-)

Comments

Ben Skeggs Aug. 28, 2013, 7:11 a.m. UTC | #1

On Wed, Aug 28, 2013 at 10:00 AM, Lucas Stach <dev@lynxeye.de> wrote:
> This flag allows userspace to give the kernel a hint that it should use
> a non-snooped resource. To guarantee coherency at all times mappings
> into userspace are done write combined, so userspace should avoid
> reading back from those resources.
Do any other combinations of cached/uncached and snooped/non-snooped
make any sense?  If so, perhaps we want to split the flags.

>
> Signed-off-by: Lucas Stach <dev@lynxeye.de>
> ---
> On x86 an optimized userspace can save up on snoop traffic in the
> system, on ARM the benefits are potentially much larger, as we can save
> the manual cache flush/invalidate.
> ---
>  drivers/gpu/drm/nouveau/nouveau_bo.c | 11 ++++++++++-
>  drivers/gpu/drm/nouveau/nouveau_bo.h |  1 +
>  include/uapi/drm/nouveau_drm.h       |  1 +
>  3 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index f4a2eb9..c5fcbcc 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -231,6 +231,12 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
>
>         nouveau_bo_fixup_align(nvbo, flags, &align, &size);
>         nvbo->bo.mem.num_pages = size >> PAGE_SHIFT;
> +
> +       if (tile_flags & NOUVEAU_GEM_TILE_WCUS)
> +               nvbo->valid_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
> +       else
> +               nvbo->valid_caching = TTM_PL_MASK_CACHING;
> +
>         nouveau_bo_placement_set(nvbo, flags, 0);
>
>         acc_size = ttm_bo_dma_acc_size(&drm->ttm.bdev, size,
> @@ -292,7 +298,7 @@ void
>  nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy)
>  {
>         struct ttm_placement *pl = &nvbo->placement;
> -       uint32_t flags = TTM_PL_MASK_CACHING |
> +       uint32_t flags = nvbo->valid_caching |
>                 (nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0);
>
>         pl->placement = nvbo->placements;
> @@ -1554,6 +1560,9 @@ nouveau_bo_vma_add(struct nouveau_bo *nvbo, struct nouveau_vm *vm,
>         if (nvbo->bo.mem.mem_type == TTM_PL_VRAM)
>                 nouveau_vm_map(vma, nvbo->bo.mem.mm_node);
>         else if (nvbo->bo.mem.mem_type == TTM_PL_TT) {
> +               if (!(nvbo->valid_caching & TTM_PL_FLAG_CACHED))
> +                       vma->access |= NV_MEM_ACCESS_NOSNOOP;
> +
>                 if (node->sg)
>                         nouveau_vm_map_sg_table(vma, 0, size, node);
>                 else
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
> index 653dbbb..2ecf8b7 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
> @@ -9,6 +9,7 @@ struct nouveau_bo {
>         struct ttm_buffer_object bo;
>         struct ttm_placement placement;
>         u32 valid_domains;
> +       u32 valid_caching;
>         u32 placements[3];
>         u32 busy_placements[3];
>         struct ttm_bo_kmap_obj kmap;
> diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
> index 2a5769f..4948eee2 100644
> --- a/include/uapi/drm/nouveau_drm.h
> +++ b/include/uapi/drm/nouveau_drm.h
> @@ -36,6 +36,7 @@
>  #define NOUVEAU_GEM_TILE_32BPP       0x00000002
>  #define NOUVEAU_GEM_TILE_ZETA        0x00000004
>  #define NOUVEAU_GEM_TILE_NONCONTIG   0x00000008
> +#define NOUVEAU_GEM_TILE_WCUS        0x00000010 /* write-combined, unsnooped */
>
>  struct drm_nouveau_gem_info {
>         uint32_t handle;
> --
> 1.8.3.1
>

Lucas Stach Aug. 28, 2013, 7:39 a.m. UTC | #2

Am Mittwoch, den 28.08.2013, 17:11 +1000 schrieb Ben Skeggs:
> On Wed, Aug 28, 2013 at 10:00 AM, Lucas Stach <dev@lynxeye.de> wrote:
> > This flag allows userspace to give the kernel a hint that it should use
> > a non-snooped resource. To guarantee coherency at all times mappings
> > into userspace are done write combined, so userspace should avoid
> > reading back from those resources.
> Do any other combinations of cached/uncached and snooped/non-snooped
> make any sense?  If so, perhaps we want to split the flags.
> 
Thought about that and I came to the conclusion that it isn't worth the
hassle. If we split it then things get more complicated on x86, were we
would have to invalidate caches manually with all the related
performance implications.

So I think it's a lot easier for userspace writers to just set the WCUS
flag on resources where the can promise no to touch the resource for
reading (AFAIR Christoph wanted this flag mostly for resources that the
driver isn't going to touch ever), or where it can happily live with
uncached reading.

> >
> > Signed-off-by: Lucas Stach <dev@lynxeye.de>
> > ---
> > On x86 an optimized userspace can save up on snoop traffic in the
> > system, on ARM the benefits are potentially much larger, as we can save
> > the manual cache flush/invalidate.
> > ---
> >  drivers/gpu/drm/nouveau/nouveau_bo.c | 11 ++++++++++-
> >  drivers/gpu/drm/nouveau/nouveau_bo.h |  1 +
> >  include/uapi/drm/nouveau_drm.h       |  1 +
> >  3 files changed, 12 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> > index f4a2eb9..c5fcbcc 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> > @@ -231,6 +231,12 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
> >
> >         nouveau_bo_fixup_align(nvbo, flags, &align, &size);
> >         nvbo->bo.mem.num_pages = size >> PAGE_SHIFT;
> > +
> > +       if (tile_flags & NOUVEAU_GEM_TILE_WCUS)
> > +               nvbo->valid_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
> > +       else
> > +               nvbo->valid_caching = TTM_PL_MASK_CACHING;
> > +
> >         nouveau_bo_placement_set(nvbo, flags, 0);
> >
> >         acc_size = ttm_bo_dma_acc_size(&drm->ttm.bdev, size,
> > @@ -292,7 +298,7 @@ void
> >  nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy)
> >  {
> >         struct ttm_placement *pl = &nvbo->placement;
> > -       uint32_t flags = TTM_PL_MASK_CACHING |
> > +       uint32_t flags = nvbo->valid_caching |
> >                 (nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0);
> >
> >         pl->placement = nvbo->placements;
> > @@ -1554,6 +1560,9 @@ nouveau_bo_vma_add(struct nouveau_bo *nvbo, struct nouveau_vm *vm,
> >         if (nvbo->bo.mem.mem_type == TTM_PL_VRAM)
> >                 nouveau_vm_map(vma, nvbo->bo.mem.mm_node);
> >         else if (nvbo->bo.mem.mem_type == TTM_PL_TT) {
> > +               if (!(nvbo->valid_caching & TTM_PL_FLAG_CACHED))
> > +                       vma->access |= NV_MEM_ACCESS_NOSNOOP;
> > +
> >                 if (node->sg)
> >                         nouveau_vm_map_sg_table(vma, 0, size, node);
> >                 else
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
> > index 653dbbb..2ecf8b7 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_bo.h
> > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
> > @@ -9,6 +9,7 @@ struct nouveau_bo {
> >         struct ttm_buffer_object bo;
> >         struct ttm_placement placement;
> >         u32 valid_domains;
> > +       u32 valid_caching;
> >         u32 placements[3];
> >         u32 busy_placements[3];
> >         struct ttm_bo_kmap_obj kmap;
> > diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
> > index 2a5769f..4948eee2 100644
> > --- a/include/uapi/drm/nouveau_drm.h
> > +++ b/include/uapi/drm/nouveau_drm.h
> > @@ -36,6 +36,7 @@
> >  #define NOUVEAU_GEM_TILE_32BPP       0x00000002
> >  #define NOUVEAU_GEM_TILE_ZETA        0x00000004
> >  #define NOUVEAU_GEM_TILE_NONCONTIG   0x00000008
> > +#define NOUVEAU_GEM_TILE_WCUS        0x00000010 /* write-combined, unsnooped */
> >
> >  struct drm_nouveau_gem_info {
> >         uint32_t handle;
> > --
> > 1.8.3.1
> >

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index f4a2eb9..c5fcbcc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -231,6 +231,12 @@  nouveau_bo_new(struct drm_device *dev, int size, int align,
 
 	nouveau_bo_fixup_align(nvbo, flags, &align, &size);
 	nvbo->bo.mem.num_pages = size >> PAGE_SHIFT;
+
+	if (tile_flags & NOUVEAU_GEM_TILE_WCUS)
+		nvbo->valid_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
+	else
+		nvbo->valid_caching = TTM_PL_MASK_CACHING;
+
 	nouveau_bo_placement_set(nvbo, flags, 0);
 
 	acc_size = ttm_bo_dma_acc_size(&drm->ttm.bdev, size,
@@ -292,7 +298,7 @@  void
 nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy)
 {
 	struct ttm_placement *pl = &nvbo->placement;
-	uint32_t flags = TTM_PL_MASK_CACHING |
+	uint32_t flags = nvbo->valid_caching |
 		(nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0);
 
 	pl->placement = nvbo->placements;
@@ -1554,6 +1560,9 @@  nouveau_bo_vma_add(struct nouveau_bo *nvbo, struct nouveau_vm *vm,
 	if (nvbo->bo.mem.mem_type == TTM_PL_VRAM)
 		nouveau_vm_map(vma, nvbo->bo.mem.mm_node);
 	else if (nvbo->bo.mem.mem_type == TTM_PL_TT) {
+		if (!(nvbo->valid_caching & TTM_PL_FLAG_CACHED))
+			vma->access |= NV_MEM_ACCESS_NOSNOOP;
+
 		if (node->sg)
 			nouveau_vm_map_sg_table(vma, 0, size, node);
 		else
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index 653dbbb..2ecf8b7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -9,6 +9,7 @@  struct nouveau_bo {
 	struct ttm_buffer_object bo;
 	struct ttm_placement placement;
 	u32 valid_domains;
+	u32 valid_caching;
 	u32 placements[3];
 	u32 busy_placements[3];
 	struct ttm_bo_kmap_obj kmap;
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index 2a5769f..4948eee2 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -36,6 +36,7 @@ 
 #define NOUVEAU_GEM_TILE_32BPP       0x00000002
 #define NOUVEAU_GEM_TILE_ZETA        0x00000004
 #define NOUVEAU_GEM_TILE_NONCONTIG   0x00000008
+#define NOUVEAU_GEM_TILE_WCUS        0x00000010 /* write-combined, unsnooped */
 
 struct drm_nouveau_gem_info {
 	uint32_t handle;

[4/6] drm/nouveau: introduce NOUVEAU_GEM_TILE_WCUS

Commit Message

Comments

Patch