Message ID | 1377648050-6649-5-git-send-email-dev@lynxeye.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Aug 28, 2013 at 10:00 AM, Lucas Stach <dev@lynxeye.de> wrote: > This flag allows userspace to give the kernel a hint that it should use > a non-snooped resource. To guarantee coherency at all times mappings > into userspace are done write combined, so userspace should avoid > reading back from those resources. Do any other combinations of cached/uncached and snooped/non-snooped make any sense? If so, perhaps we want to split the flags. > > Signed-off-by: Lucas Stach <dev@lynxeye.de> > --- > On x86 an optimized userspace can save up on snoop traffic in the > system, on ARM the benefits are potentially much larger, as we can save > the manual cache flush/invalidate. > --- > drivers/gpu/drm/nouveau/nouveau_bo.c | 11 ++++++++++- > drivers/gpu/drm/nouveau/nouveau_bo.h | 1 + > include/uapi/drm/nouveau_drm.h | 1 + > 3 files changed, 12 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c > index f4a2eb9..c5fcbcc 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_bo.c > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c > @@ -231,6 +231,12 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, > > nouveau_bo_fixup_align(nvbo, flags, &align, &size); > nvbo->bo.mem.num_pages = size >> PAGE_SHIFT; > + > + if (tile_flags & NOUVEAU_GEM_TILE_WCUS) > + nvbo->valid_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; > + else > + nvbo->valid_caching = TTM_PL_MASK_CACHING; > + > nouveau_bo_placement_set(nvbo, flags, 0); > > acc_size = ttm_bo_dma_acc_size(&drm->ttm.bdev, size, > @@ -292,7 +298,7 @@ void > nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy) > { > struct ttm_placement *pl = &nvbo->placement; > - uint32_t flags = TTM_PL_MASK_CACHING | > + uint32_t flags = nvbo->valid_caching | > (nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0); > > pl->placement = nvbo->placements; > @@ -1554,6 +1560,9 @@ nouveau_bo_vma_add(struct nouveau_bo *nvbo, struct nouveau_vm *vm, > if (nvbo->bo.mem.mem_type == TTM_PL_VRAM) > nouveau_vm_map(vma, nvbo->bo.mem.mm_node); > else if (nvbo->bo.mem.mem_type == TTM_PL_TT) { > + if (!(nvbo->valid_caching & TTM_PL_FLAG_CACHED)) > + vma->access |= NV_MEM_ACCESS_NOSNOOP; > + > if (node->sg) > nouveau_vm_map_sg_table(vma, 0, size, node); > else > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h > index 653dbbb..2ecf8b7 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_bo.h > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h > @@ -9,6 +9,7 @@ struct nouveau_bo { > struct ttm_buffer_object bo; > struct ttm_placement placement; > u32 valid_domains; > + u32 valid_caching; > u32 placements[3]; > u32 busy_placements[3]; > struct ttm_bo_kmap_obj kmap; > diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h > index 2a5769f..4948eee2 100644 > --- a/include/uapi/drm/nouveau_drm.h > +++ b/include/uapi/drm/nouveau_drm.h > @@ -36,6 +36,7 @@ > #define NOUVEAU_GEM_TILE_32BPP 0x00000002 > #define NOUVEAU_GEM_TILE_ZETA 0x00000004 > #define NOUVEAU_GEM_TILE_NONCONTIG 0x00000008 > +#define NOUVEAU_GEM_TILE_WCUS 0x00000010 /* write-combined, unsnooped */ > > struct drm_nouveau_gem_info { > uint32_t handle; > -- > 1.8.3.1 >
Am Mittwoch, den 28.08.2013, 17:11 +1000 schrieb Ben Skeggs: > On Wed, Aug 28, 2013 at 10:00 AM, Lucas Stach <dev@lynxeye.de> wrote: > > This flag allows userspace to give the kernel a hint that it should use > > a non-snooped resource. To guarantee coherency at all times mappings > > into userspace are done write combined, so userspace should avoid > > reading back from those resources. > Do any other combinations of cached/uncached and snooped/non-snooped > make any sense? If so, perhaps we want to split the flags. > Thought about that and I came to the conclusion that it isn't worth the hassle. If we split it then things get more complicated on x86, were we would have to invalidate caches manually with all the related performance implications. So I think it's a lot easier for userspace writers to just set the WCUS flag on resources where the can promise no to touch the resource for reading (AFAIR Christoph wanted this flag mostly for resources that the driver isn't going to touch ever), or where it can happily live with uncached reading. > > > > Signed-off-by: Lucas Stach <dev@lynxeye.de> > > --- > > On x86 an optimized userspace can save up on snoop traffic in the > > system, on ARM the benefits are potentially much larger, as we can save > > the manual cache flush/invalidate. > > --- > > drivers/gpu/drm/nouveau/nouveau_bo.c | 11 ++++++++++- > > drivers/gpu/drm/nouveau/nouveau_bo.h | 1 + > > include/uapi/drm/nouveau_drm.h | 1 + > > 3 files changed, 12 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c > > index f4a2eb9..c5fcbcc 100644 > > --- a/drivers/gpu/drm/nouveau/nouveau_bo.c > > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c > > @@ -231,6 +231,12 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, > > > > nouveau_bo_fixup_align(nvbo, flags, &align, &size); > > nvbo->bo.mem.num_pages = size >> PAGE_SHIFT; > > + > > + if (tile_flags & NOUVEAU_GEM_TILE_WCUS) > > + nvbo->valid_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; > > + else > > + nvbo->valid_caching = TTM_PL_MASK_CACHING; > > + > > nouveau_bo_placement_set(nvbo, flags, 0); > > > > acc_size = ttm_bo_dma_acc_size(&drm->ttm.bdev, size, > > @@ -292,7 +298,7 @@ void > > nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy) > > { > > struct ttm_placement *pl = &nvbo->placement; > > - uint32_t flags = TTM_PL_MASK_CACHING | > > + uint32_t flags = nvbo->valid_caching | > > (nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0); > > > > pl->placement = nvbo->placements; > > @@ -1554,6 +1560,9 @@ nouveau_bo_vma_add(struct nouveau_bo *nvbo, struct nouveau_vm *vm, > > if (nvbo->bo.mem.mem_type == TTM_PL_VRAM) > > nouveau_vm_map(vma, nvbo->bo.mem.mm_node); > > else if (nvbo->bo.mem.mem_type == TTM_PL_TT) { > > + if (!(nvbo->valid_caching & TTM_PL_FLAG_CACHED)) > > + vma->access |= NV_MEM_ACCESS_NOSNOOP; > > + > > if (node->sg) > > nouveau_vm_map_sg_table(vma, 0, size, node); > > else > > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h > > index 653dbbb..2ecf8b7 100644 > > --- a/drivers/gpu/drm/nouveau/nouveau_bo.h > > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h > > @@ -9,6 +9,7 @@ struct nouveau_bo { > > struct ttm_buffer_object bo; > > struct ttm_placement placement; > > u32 valid_domains; > > + u32 valid_caching; > > u32 placements[3]; > > u32 busy_placements[3]; > > struct ttm_bo_kmap_obj kmap; > > diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h > > index 2a5769f..4948eee2 100644 > > --- a/include/uapi/drm/nouveau_drm.h > > +++ b/include/uapi/drm/nouveau_drm.h > > @@ -36,6 +36,7 @@ > > #define NOUVEAU_GEM_TILE_32BPP 0x00000002 > > #define NOUVEAU_GEM_TILE_ZETA 0x00000004 > > #define NOUVEAU_GEM_TILE_NONCONTIG 0x00000008 > > +#define NOUVEAU_GEM_TILE_WCUS 0x00000010 /* write-combined, unsnooped */ > > > > struct drm_nouveau_gem_info { > > uint32_t handle; > > -- > > 1.8.3.1 > >
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index f4a2eb9..c5fcbcc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -231,6 +231,12 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, nouveau_bo_fixup_align(nvbo, flags, &align, &size); nvbo->bo.mem.num_pages = size >> PAGE_SHIFT; + + if (tile_flags & NOUVEAU_GEM_TILE_WCUS) + nvbo->valid_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; + else + nvbo->valid_caching = TTM_PL_MASK_CACHING; + nouveau_bo_placement_set(nvbo, flags, 0); acc_size = ttm_bo_dma_acc_size(&drm->ttm.bdev, size, @@ -292,7 +298,7 @@ void nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy) { struct ttm_placement *pl = &nvbo->placement; - uint32_t flags = TTM_PL_MASK_CACHING | + uint32_t flags = nvbo->valid_caching | (nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0); pl->placement = nvbo->placements; @@ -1554,6 +1560,9 @@ nouveau_bo_vma_add(struct nouveau_bo *nvbo, struct nouveau_vm *vm, if (nvbo->bo.mem.mem_type == TTM_PL_VRAM) nouveau_vm_map(vma, nvbo->bo.mem.mm_node); else if (nvbo->bo.mem.mem_type == TTM_PL_TT) { + if (!(nvbo->valid_caching & TTM_PL_FLAG_CACHED)) + vma->access |= NV_MEM_ACCESS_NOSNOOP; + if (node->sg) nouveau_vm_map_sg_table(vma, 0, size, node); else diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index 653dbbb..2ecf8b7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -9,6 +9,7 @@ struct nouveau_bo { struct ttm_buffer_object bo; struct ttm_placement placement; u32 valid_domains; + u32 valid_caching; u32 placements[3]; u32 busy_placements[3]; struct ttm_bo_kmap_obj kmap; diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index 2a5769f..4948eee2 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -36,6 +36,7 @@ #define NOUVEAU_GEM_TILE_32BPP 0x00000002 #define NOUVEAU_GEM_TILE_ZETA 0x00000004 #define NOUVEAU_GEM_TILE_NONCONTIG 0x00000008 +#define NOUVEAU_GEM_TILE_WCUS 0x00000010 /* write-combined, unsnooped */ struct drm_nouveau_gem_info { uint32_t handle;
This flag allows userspace to give the kernel a hint that it should use a non-snooped resource. To guarantee coherency at all times mappings into userspace are done write combined, so userspace should avoid reading back from those resources. Signed-off-by: Lucas Stach <dev@lynxeye.de> --- On x86 an optimized userspace can save up on snoop traffic in the system, on ARM the benefits are potentially much larger, as we can save the manual cache flush/invalidate. --- drivers/gpu/drm/nouveau/nouveau_bo.c | 11 ++++++++++- drivers/gpu/drm/nouveau/nouveau_bo.h | 1 + include/uapi/drm/nouveau_drm.h | 1 + 3 files changed, 12 insertions(+), 1 deletion(-)